mteb 2.0.5__py3-none-any.whl → 2.1.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (412) hide show
  1. mteb/__init__.py +10 -1
  2. mteb/_create_dataloaders.py +8 -3
  3. mteb/_evaluators/any_sts_evaluator.py +14 -12
  4. mteb/_evaluators/clustering_evaluator.py +1 -1
  5. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +2 -2
  6. mteb/_evaluators/pair_classification_evaluator.py +3 -1
  7. mteb/_evaluators/retrieval_metrics.py +0 -9
  8. mteb/_evaluators/sklearn_evaluator.py +15 -28
  9. mteb/_evaluators/text/bitext_mining_evaluator.py +4 -1
  10. mteb/_evaluators/text/summarization_evaluator.py +4 -2
  11. mteb/_evaluators/zeroshot_classification_evaluator.py +2 -2
  12. mteb/abstasks/_stratification.py +1 -1
  13. mteb/abstasks/abstask.py +6 -1
  14. mteb/abstasks/clustering.py +1 -1
  15. mteb/abstasks/dataset_card_template.md +1 -1
  16. mteb/abstasks/multilabel_classification.py +2 -2
  17. mteb/abstasks/retrieval.py +2 -1
  18. mteb/abstasks/retrieval_dataset_loaders.py +1 -1
  19. mteb/abstasks/task_metadata.py +2 -1
  20. mteb/benchmarks/_create_table.py +1 -3
  21. mteb/benchmarks/benchmark.py +18 -1
  22. mteb/benchmarks/benchmarks/__init__.py +4 -0
  23. mteb/benchmarks/benchmarks/benchmarks.py +125 -16
  24. mteb/benchmarks/get_benchmark.py +3 -1
  25. mteb/cache.py +7 -3
  26. mteb/descriptive_stats/Classification/DutchColaClassification.json +54 -0
  27. mteb/descriptive_stats/Classification/DutchGovernmentBiasClassification.json +54 -0
  28. mteb/descriptive_stats/Classification/DutchNewsArticlesClassification.json +90 -0
  29. mteb/descriptive_stats/Classification/DutchSarcasticHeadlinesClassification.json +54 -0
  30. mteb/descriptive_stats/Classification/IconclassClassification.json +96 -0
  31. mteb/descriptive_stats/Classification/OpenTenderClassification.json +222 -0
  32. mteb/descriptive_stats/Classification/VaccinChatNLClassification.json +1068 -0
  33. mteb/descriptive_stats/Clustering/DutchNewsArticlesClusteringP2P.json +45 -0
  34. mteb/descriptive_stats/Clustering/DutchNewsArticlesClusteringS2S.json +45 -0
  35. mteb/descriptive_stats/Clustering/IconclassClusteringS2S.json +48 -0
  36. mteb/descriptive_stats/Clustering/OpenTenderClusteringP2P.json +111 -0
  37. mteb/descriptive_stats/Clustering/OpenTenderClusteringS2S.json +111 -0
  38. mteb/descriptive_stats/Clustering/VABBClusteringP2P.json +60 -0
  39. mteb/descriptive_stats/Clustering/VABBClusteringS2S.json +60 -0
  40. mteb/descriptive_stats/Image/Any2AnyMultilingualRetrieval/XFlickr30kCoT2IRetrieval.json +243 -153
  41. mteb/descriptive_stats/Image/Any2AnyMultilingualRetrieval/XM3600T2IRetrieval.json +999 -629
  42. mteb/descriptive_stats/Image/Any2AnyRetrieval/OVENIT2TRetrieval.json +33 -17
  43. mteb/descriptive_stats/Image/DocumentUnderstanding/MIRACLVisionRetrieval.json +574 -0
  44. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3ComputerScienceRetrieval.json +214 -0
  45. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3EnergyRetrieval.json +214 -0
  46. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceEnRetrieval.json +214 -0
  47. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceFrRetrieval.json +214 -0
  48. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3HrRetrieval.json +214 -0
  49. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3IndustrialRetrieval.json +214 -0
  50. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3NuclearRetrieval.json +214 -0
  51. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PharmaceuticalsRetrieval.json +214 -0
  52. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PhysicsRetrieval.json +214 -0
  53. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3TelecomRetrieval.json +214 -0
  54. mteb/descriptive_stats/MultilabelClassification/CovidDisinformationNLMultiLabelClassification.json +84 -0
  55. mteb/descriptive_stats/MultilabelClassification/VABBMultiLabelClassification.json +156 -0
  56. mteb/descriptive_stats/PairClassification/SICKNLPairClassification.json +35 -0
  57. mteb/descriptive_stats/PairClassification/XLWICNLPairClassification.json +35 -0
  58. mteb/descriptive_stats/Retrieval/ArguAna-NL.v2.json +30 -0
  59. mteb/descriptive_stats/Retrieval/ClimateFEVERHardNegatives.v2.json +30 -0
  60. mteb/descriptive_stats/Retrieval/DBPediaHardNegatives.v2.json +30 -0
  61. mteb/descriptive_stats/Retrieval/DutchNewsArticlesRetrieval.json +30 -0
  62. mteb/descriptive_stats/Retrieval/FEVERHardNegatives.v2.json +30 -0
  63. mteb/descriptive_stats/Retrieval/HotpotQAHardNegatives.v2.json +30 -0
  64. mteb/descriptive_stats/Retrieval/LegalQANLRetrieval.json +30 -0
  65. mteb/descriptive_stats/Retrieval/NFCorpus-NL.v2.json +30 -0
  66. mteb/descriptive_stats/Retrieval/OpenTenderRetrieval.json +30 -0
  67. mteb/descriptive_stats/Retrieval/QuoraRetrievalHardNegatives.v2.json +30 -0
  68. mteb/descriptive_stats/Retrieval/RiaNewsRetrievalHardNegatives.v2.json +30 -0
  69. mteb/descriptive_stats/Retrieval/SCIDOCS-NL.v2.json +30 -0
  70. mteb/descriptive_stats/Retrieval/SciFact-NL.v2.json +30 -0
  71. mteb/descriptive_stats/Retrieval/VABBRetrieval.json +30 -0
  72. mteb/descriptive_stats/Retrieval/VDRMultilingualRetrieval.json +184 -0
  73. mteb/descriptive_stats/Retrieval/WinoGrande.json +14 -14
  74. mteb/descriptive_stats/Retrieval/bBSARDNLRetrieval.json +30 -0
  75. mteb/descriptive_stats/STS/SICK-NL-STS.json +28 -0
  76. mteb/evaluate.py +26 -6
  77. mteb/languages/check_language_code.py +11 -3
  78. mteb/languages/language_scripts.py +4 -0
  79. mteb/leaderboard/app.py +5 -3
  80. mteb/leaderboard/benchmark_selector.py +4 -2
  81. mteb/leaderboard/text_segments.py +1 -1
  82. mteb/models/cache_wrappers/cache_wrapper.py +1 -1
  83. mteb/models/instruct_wrapper.py +3 -0
  84. mteb/models/model_implementations/align_models.py +6 -0
  85. mteb/models/model_implementations/andersborges.py +51 -0
  86. mteb/models/model_implementations/ara_models.py +7 -0
  87. mteb/models/model_implementations/b1ade_models.py +1 -1
  88. mteb/models/model_implementations/bge_models.py +1 -3
  89. mteb/models/model_implementations/blip2_models.py +9 -0
  90. mteb/models/model_implementations/blip_models.py +19 -0
  91. mteb/models/model_implementations/bmretriever_models.py +1 -1
  92. mteb/models/model_implementations/cadet_models.py +8 -0
  93. mteb/models/model_implementations/cde_models.py +12 -0
  94. mteb/models/model_implementations/codefuse_models.py +15 -0
  95. mteb/models/model_implementations/codesage_models.py +12 -0
  96. mteb/models/model_implementations/cohere_models.py +1 -1
  97. mteb/models/model_implementations/colqwen_models.py +57 -0
  98. mteb/models/model_implementations/emillykkejensen_models.py +70 -0
  99. mteb/models/model_implementations/gme_v_models.py +2 -2
  100. mteb/models/model_implementations/ibm_granite_models.py +1 -1
  101. mteb/models/model_implementations/inf_models.py +3 -3
  102. mteb/models/model_implementations/jasper_models.py +253 -2
  103. mteb/models/model_implementations/jina_models.py +12 -2
  104. mteb/models/model_implementations/kalm_models.py +159 -25
  105. mteb/models/model_implementations/llm2vec_models.py +1 -1
  106. mteb/models/model_implementations/misc_models.py +8 -2
  107. mteb/models/model_implementations/moco_models.py +9 -0
  108. mteb/models/model_implementations/mxbai_models.py +1 -1
  109. mteb/models/model_implementations/openclip_models.py +16 -0
  110. mteb/models/model_implementations/piccolo_models.py +6 -0
  111. mteb/models/model_implementations/rasgaard_models.py +33 -0
  112. mteb/models/model_implementations/reasonir_model.py +1 -1
  113. mteb/models/model_implementations/salesforce_models.py +1 -1
  114. mteb/models/model_implementations/seed_1_6_embedding_models.py +1 -1
  115. mteb/models/model_implementations/spartan8806_atles_champion.py +26 -0
  116. mteb/models/model_implementations/tarka_models.py +374 -0
  117. mteb/models/model_implementations/voyage_models.py +6 -7
  118. mteb/models/model_implementations/voyage_v.py +10 -9
  119. mteb/models/model_implementations/yuan_models.py +33 -0
  120. mteb/models/search_wrappers.py +6 -5
  121. mteb/results/task_result.py +19 -17
  122. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +4 -2
  123. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining_fast.py +1 -1
  124. mteb/tasks/bitext_mining/multilingual/ru_sci_bench_bitext_mining.py +1 -5
  125. mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -6
  126. mteb/tasks/classification/ara/ajgt.py +1 -2
  127. mteb/tasks/classification/ara/hotel_review_sentiment_classification.py +1 -2
  128. mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -2
  129. mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -2
  130. mteb/tasks/classification/ara/tweet_emotion_classification.py +1 -2
  131. mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -2
  132. mteb/tasks/classification/ben/bengali_document_classification.py +1 -2
  133. mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -2
  134. mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -2
  135. mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +1 -2
  136. mteb/tasks/classification/ces/czech_product_review_sentiment_classification.py +1 -2
  137. mteb/tasks/classification/ces/czech_so_me_sentiment_classification.py +1 -2
  138. mteb/tasks/classification/dan/angry_tweets_classification.py +2 -3
  139. mteb/tasks/classification/dan/danish_political_comments_classification.py +1 -2
  140. mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -2
  141. mteb/tasks/classification/dan/dk_hate_classification.py +1 -2
  142. mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -2
  143. mteb/tasks/classification/deu/ten_k_gnad_classification.py +1 -2
  144. mteb/tasks/classification/eng/amazon_polarity_classification.py +1 -2
  145. mteb/tasks/classification/eng/arxiv_classification.py +1 -2
  146. mteb/tasks/classification/eng/banking77_classification.py +1 -2
  147. mteb/tasks/classification/eng/dbpedia_classification.py +1 -2
  148. mteb/tasks/classification/eng/emotion_classification.py +1 -2
  149. mteb/tasks/classification/eng/financial_phrasebank_classification.py +1 -2
  150. mteb/tasks/classification/eng/frenk_en_classification.py +1 -2
  151. mteb/tasks/classification/eng/gtsrb_classification.py +1 -1
  152. mteb/tasks/classification/eng/imdb_classification.py +1 -2
  153. mteb/tasks/classification/eng/legal_bench_classification.py +15 -121
  154. mteb/tasks/classification/eng/news_classification.py +1 -2
  155. mteb/tasks/classification/eng/patch_camelyon_classification.py +1 -1
  156. mteb/tasks/classification/eng/patent_classification.py +1 -2
  157. mteb/tasks/classification/eng/poem_sentiment_classification.py +1 -2
  158. mteb/tasks/classification/eng/sds_eye_protection_classification.py +1 -2
  159. mteb/tasks/classification/eng/sds_gloves_classification.py +1 -2
  160. mteb/tasks/classification/eng/toxic_chat_classification.py +2 -19
  161. mteb/tasks/classification/eng/toxic_conversations_classification.py +1 -2
  162. mteb/tasks/classification/eng/tweet_sentiment_extraction_classification.py +1 -2
  163. mteb/tasks/classification/eng/tweet_topic_single_classification.py +2 -13
  164. mteb/tasks/classification/eng/ucf101_classification.py +1 -5
  165. mteb/tasks/classification/eng/wikipedia_bio_met_chem_classification.py +1 -2
  166. mteb/tasks/classification/eng/wikipedia_chem_fields_classification.py +1 -2
  167. mteb/tasks/classification/eng/wikipedia_comp_chem_spectroscopy_classification.py +1 -2
  168. mteb/tasks/classification/eng/wikipedia_crystallography_analytical_classification.py +1 -2
  169. mteb/tasks/classification/eng/wikipedia_theoretical_applied_classification.py +1 -2
  170. mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -2
  171. mteb/tasks/classification/eng/yelp_review_full_classification.py +1 -2
  172. mteb/tasks/classification/est/estonian_valence.py +1 -2
  173. mteb/tasks/classification/fas/fa_mteb_classification.py +7 -14
  174. mteb/tasks/classification/fil/filipino_hate_speech_classification.py +1 -2
  175. mteb/tasks/classification/fin/fin_toxicity_classification.py +2 -11
  176. mteb/tasks/classification/fra/french_book_reviews.py +1 -2
  177. mteb/tasks/classification/fra/movie_review_sentiment_classification.py +1 -2
  178. mteb/tasks/classification/guj/gujarati_news_classification.py +1 -2
  179. mteb/tasks/classification/heb/hebrew_sentiment_analysis.py +1 -2
  180. mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -2
  181. mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -2
  182. mteb/tasks/classification/hrv/frenk_hr_classification.py +1 -2
  183. mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +1 -2
  184. mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -2
  185. mteb/tasks/classification/ita/italian_linguist_acceptability_classification.py +1 -2
  186. mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -2
  187. mteb/tasks/classification/jpn/wrime_classification.py +1 -2
  188. mteb/tasks/classification/kan/kannada_news_classification.py +1 -2
  189. mteb/tasks/classification/kor/klue_tc.py +1 -2
  190. mteb/tasks/classification/kor/kor_hate_classification.py +2 -17
  191. mteb/tasks/classification/kor/kor_sarcasm_classification.py +2 -19
  192. mteb/tasks/classification/kur/kurdish_sentiment_classification.py +1 -2
  193. mteb/tasks/classification/mal/malayalam_news_classification.py +1 -2
  194. mteb/tasks/classification/mar/marathi_news_classification.py +1 -2
  195. mteb/tasks/classification/mkd/macedonian_tweet_sentiment_classification.py +1 -2
  196. mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -6
  197. mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -4
  198. mteb/tasks/classification/multilingual/ru_sci_bench_classification.py +4 -23
  199. mteb/tasks/classification/multilingual/scala_classification.py +1 -2
  200. mteb/tasks/classification/multilingual/sib200_classification.py +1 -6
  201. mteb/tasks/classification/mya/myanmar_news.py +2 -3
  202. mteb/tasks/classification/nep/nepali_news_classification.py +1 -2
  203. mteb/tasks/classification/nld/__init__.py +16 -0
  204. mteb/tasks/classification/nld/dutch_book_review_sentiment_classification.py +4 -2
  205. mteb/tasks/classification/nld/dutch_cola_classification.py +41 -0
  206. mteb/tasks/classification/nld/dutch_government_bias_classification.py +40 -0
  207. mteb/tasks/classification/nld/dutch_news_articles_classification.py +33 -0
  208. mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +39 -0
  209. mteb/tasks/classification/nld/iconclass_classification.py +44 -0
  210. mteb/tasks/classification/nld/open_tender_classification.py +41 -0
  211. mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +49 -0
  212. mteb/tasks/classification/nob/no_rec_classification.py +1 -2
  213. mteb/tasks/classification/nob/norwegian_parliament_classification.py +1 -2
  214. mteb/tasks/classification/ory/odia_news_classification.py +1 -2
  215. mteb/tasks/classification/pol/polish_classification.py +3 -6
  216. mteb/tasks/classification/ron/moroco.py +1 -2
  217. mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -2
  218. mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -2
  219. mteb/tasks/classification/rus/georeview_classification.py +1 -2
  220. mteb/tasks/classification/rus/headline_classification.py +1 -2
  221. mteb/tasks/classification/rus/inappropriateness_classification.py +1 -2
  222. mteb/tasks/classification/rus/ru_reviews_classification.py +1 -2
  223. mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -2
  224. mteb/tasks/classification/rus/senti_ru_eval.py +1 -2
  225. mteb/tasks/classification/sin/sinhala_news_classification.py +1 -2
  226. mteb/tasks/classification/sin/sinhala_news_source_classification.py +1 -2
  227. mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +1 -2
  228. mteb/tasks/classification/slk/slovak_hate_speech_classification.py +1 -2
  229. mteb/tasks/classification/slk/slovak_movie_review_sentiment_classification.py +1 -2
  230. mteb/tasks/classification/slv/frenk_sl_classification.py +1 -2
  231. mteb/tasks/classification/spa/spanish_news_classification.py +1 -2
  232. mteb/tasks/classification/spa/spanish_sentiment_classification.py +1 -2
  233. mteb/tasks/classification/ssw/siswati_news_classification.py +1 -2
  234. mteb/tasks/classification/swa/swahili_news_classification.py +1 -2
  235. mteb/tasks/classification/swe/dalaj_classification.py +1 -2
  236. mteb/tasks/classification/swe/swe_rec_classification.py +1 -2
  237. mteb/tasks/classification/swe/swedish_sentiment_classification.py +1 -2
  238. mteb/tasks/classification/tam/tamil_news_classification.py +1 -2
  239. mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +1 -2
  240. mteb/tasks/classification/tha/wisesight_sentiment_classification.py +1 -2
  241. mteb/tasks/classification/tha/wongnai_reviews_classification.py +1 -1
  242. mteb/tasks/classification/tsn/tswana_news_classification.py +1 -2
  243. mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +1 -2
  244. mteb/tasks/classification/tur/turkish_product_sentiment_classification.py +1 -2
  245. mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -15
  246. mteb/tasks/classification/urd/urdu_roman_sentiment_classification.py +1 -2
  247. mteb/tasks/classification/vie/amazon_counterfactual_vn_classification.py +1 -6
  248. mteb/tasks/classification/vie/amazon_polarity_vn_classification.py +1 -6
  249. mteb/tasks/classification/vie/amazon_reviews_vn_classification.py +1 -5
  250. mteb/tasks/classification/vie/banking77_vn_classification.py +1 -5
  251. mteb/tasks/classification/vie/emotion_vn_classification.py +1 -5
  252. mteb/tasks/classification/vie/imdb_vn_classification.py +1 -5
  253. mteb/tasks/classification/vie/massive_intent_vn_classification.py +1 -5
  254. mteb/tasks/classification/vie/massive_scenario_vn_classification.py +1 -5
  255. mteb/tasks/classification/vie/mtop_domain_vn_classification.py +1 -5
  256. mteb/tasks/classification/vie/mtop_intent_vn_classification.py +1 -5
  257. mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -5
  258. mteb/tasks/classification/vie/tweet_sentiment_extraction_vn_classification.py +1 -5
  259. mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -2
  260. mteb/tasks/classification/zho/cmteb_classification.py +5 -10
  261. mteb/tasks/classification/zho/yue_openrice_review_classification.py +1 -2
  262. mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -2
  263. mteb/tasks/clustering/__init__.py +1 -0
  264. mteb/tasks/clustering/jpn/mews_c16_ja_clustering.py +1 -3
  265. mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -6
  266. mteb/tasks/clustering/nld/__init__.py +17 -0
  267. mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +40 -0
  268. mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +40 -0
  269. mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +50 -0
  270. mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +54 -0
  271. mteb/tasks/clustering/nld/open_tender_clustering_s2s.py +44 -0
  272. mteb/tasks/clustering/nld/vabb_clustering_p2p.py +54 -0
  273. mteb/tasks/clustering/nld/vabb_clustering_s2s.py +54 -0
  274. mteb/tasks/clustering/vie/reddit_clustering_p2p_vn.py +1 -5
  275. mteb/tasks/clustering/vie/reddit_clustering_vn.py +1 -5
  276. mteb/tasks/clustering/vie/stack_exchange_clustering_p2p_vn.py +1 -5
  277. mteb/tasks/clustering/vie/stack_exchange_clustering_vn.py +1 -5
  278. mteb/tasks/clustering/vie/twenty_newsgroups_clustering_vn.py +1 -5
  279. mteb/tasks/multilabel_classification/__init__.py +1 -0
  280. mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -5
  281. mteb/tasks/multilabel_classification/kor/kor_hate_speech_ml_classification.py +1 -9
  282. mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -6
  283. mteb/tasks/multilabel_classification/nld/__init__.py +9 -0
  284. mteb/tasks/multilabel_classification/nld/covid_disinformation_nl_multi_label_classification.py +91 -0
  285. mteb/tasks/multilabel_classification/nld/vabb_multi_label_classification.py +47 -0
  286. mteb/tasks/multilabel_classification/por/brazilian_toxic_tweets_classification.py +1 -6
  287. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
  288. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -2
  289. mteb/tasks/pair_classification/__init__.py +1 -0
  290. mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -6
  291. mteb/tasks/pair_classification/eng/legal_bench_pc.py +1 -9
  292. mteb/tasks/pair_classification/multilingual/indic_xnli_pair_classification.py +9 -8
  293. mteb/tasks/pair_classification/nld/__init__.py +7 -0
  294. mteb/tasks/pair_classification/nld/sick_nl_pair_classification.py +39 -0
  295. mteb/tasks/pair_classification/nld/xlwic_nl_pair_classification.py +44 -0
  296. mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -5
  297. mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -5
  298. mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -5
  299. mteb/tasks/regression/multilingual/ru_sci_bench_regression.py +2 -6
  300. mteb/tasks/reranking/multilingual/x_glue_wpr_reranking.py +1 -2
  301. mteb/tasks/reranking/vie/ask_ubuntu_dup_questions_vn.py +1 -5
  302. mteb/tasks/reranking/vie/sci_docs_reranking_vn.py +1 -5
  303. mteb/tasks/reranking/vie/stack_overflow_dup_questions_vn.py +1 -5
  304. mteb/tasks/retrieval/code/code_rag.py +8 -8
  305. mteb/tasks/retrieval/dan/dan_fever_retrieval.py +1 -1
  306. mteb/tasks/retrieval/dan/tv2_nordretrieval.py +2 -2
  307. mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +2 -2
  308. mteb/tasks/retrieval/eng/__init__.py +18 -4
  309. mteb/tasks/retrieval/eng/climate_fever_retrieval.py +68 -77
  310. mteb/tasks/retrieval/eng/dbpedia_retrieval.py +55 -50
  311. mteb/tasks/retrieval/eng/fever_retrieval.py +62 -67
  312. mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +0 -4
  313. mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +0 -4
  314. mteb/tasks/retrieval/eng/hotpot_qa_retrieval.py +57 -67
  315. mteb/tasks/retrieval/eng/legal_summarization_retrieval.py +1 -1
  316. mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -8
  317. mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +0 -3
  318. mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +0 -2
  319. mteb/tasks/retrieval/eng/oven_it2t_retrieval.py +1 -1
  320. mteb/tasks/retrieval/eng/quora_retrieval.py +51 -46
  321. mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +0 -4
  322. mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +0 -4
  323. mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +0 -2
  324. mteb/tasks/retrieval/eng/wino_grande_retrieval.py +1 -1
  325. mteb/tasks/retrieval/jpn/ja_cwir_retrieval.py +1 -4
  326. mteb/tasks/retrieval/jpn/ja_gov_faqs_retrieval.py +1 -1
  327. mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +11 -4
  328. mteb/tasks/retrieval/multilingual/__init__.py +22 -0
  329. mteb/tasks/retrieval/multilingual/belebele_retrieval.py +6 -5
  330. mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +0 -2
  331. mteb/tasks/retrieval/multilingual/miracl_retrieval.py +1 -1
  332. mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +2 -9
  333. mteb/tasks/retrieval/multilingual/mkqa_retrieval.py +1 -2
  334. mteb/tasks/retrieval/multilingual/mlqa_retrieval.py +1 -4
  335. mteb/tasks/retrieval/multilingual/multi_long_doc_retrieval.py +1 -2
  336. mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +9 -4
  337. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +2 -12
  338. mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +0 -2
  339. mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +399 -0
  340. mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +0 -2
  341. mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +6 -5
  342. mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +3 -4
  343. mteb/tasks/retrieval/nld/__init__.py +18 -4
  344. mteb/tasks/retrieval/nld/argu_ana_nl_retrieval.py +46 -27
  345. mteb/tasks/retrieval/nld/bbsard_nl_retrieval.py +44 -0
  346. mteb/tasks/retrieval/nld/dutch_news_articles_retrieval.py +33 -0
  347. mteb/tasks/retrieval/nld/legal_qa_nl_retrieval.py +42 -0
  348. mteb/tasks/retrieval/nld/nf_corpus_nl_retrieval.py +42 -25
  349. mteb/tasks/retrieval/nld/open_tender_retrieval.py +41 -0
  350. mteb/tasks/retrieval/nld/sci_fact_nl_retrieval.py +42 -24
  351. mteb/tasks/retrieval/nld/scidocsnl_retrieval.py +44 -27
  352. mteb/tasks/retrieval/nld/vabb_retrieval.py +44 -0
  353. mteb/tasks/retrieval/nob/norquad.py +2 -2
  354. mteb/tasks/retrieval/nob/snl_retrieval.py +2 -2
  355. mteb/tasks/retrieval/rus/__init__.py +11 -2
  356. mteb/tasks/retrieval/rus/ria_news_retrieval.py +48 -44
  357. mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -7
  358. mteb/tasks/retrieval/tur/tur_hist_quad.py +2 -2
  359. mteb/tasks/retrieval/vie/argu_ana_vn_retrieval.py +1 -5
  360. mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +1 -5
  361. mteb/tasks/retrieval/vie/cqa_dupstack_android_vn_retrieval.py +1 -5
  362. mteb/tasks/retrieval/vie/cqa_dupstack_gis_vn_retrieval.py +1 -5
  363. mteb/tasks/retrieval/vie/cqa_dupstack_mathematica_vn_retrieval.py +1 -5
  364. mteb/tasks/retrieval/vie/cqa_dupstack_physics_vn_retrieval.py +1 -5
  365. mteb/tasks/retrieval/vie/cqa_dupstack_programmers_vn_retrieval.py +1 -5
  366. mteb/tasks/retrieval/vie/cqa_dupstack_stats_vn_retrieval.py +1 -5
  367. mteb/tasks/retrieval/vie/cqa_dupstack_tex_vn_retrieval.py +1 -5
  368. mteb/tasks/retrieval/vie/cqa_dupstack_unix_vn_retrieval.py +1 -5
  369. mteb/tasks/retrieval/vie/cqa_dupstack_webmasters_vn_retrieval.py +1 -5
  370. mteb/tasks/retrieval/vie/cqa_dupstack_wordpress_vn_retrieval.py +1 -5
  371. mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +1 -5
  372. mteb/tasks/retrieval/vie/fevervn_retrieval.py +1 -7
  373. mteb/tasks/retrieval/vie/fi_qa2018_vn_retrieval.py +1 -5
  374. mteb/tasks/retrieval/vie/green_node_table_markdown_retrieval.py +16 -1
  375. mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +1 -6
  376. mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +1 -5
  377. mteb/tasks/retrieval/vie/nf_corpus_vn_retrieval.py +1 -5
  378. mteb/tasks/retrieval/vie/nqvn_retrieval.py +1 -5
  379. mteb/tasks/retrieval/vie/quora_vn_retrieval.py +1 -6
  380. mteb/tasks/retrieval/vie/sci_fact_vn_retrieval.py +1 -5
  381. mteb/tasks/retrieval/vie/scidocsvn_retrieval.py +1 -6
  382. mteb/tasks/retrieval/vie/touche2020_vn_retrieval.py +1 -5
  383. mteb/tasks/retrieval/vie/treccovidvn_retrieval.py +1 -5
  384. mteb/tasks/sts/__init__.py +1 -0
  385. mteb/tasks/sts/nld/__init__.py +5 -0
  386. mteb/tasks/sts/nld/sick_nl_sts.py +42 -0
  387. mteb/tasks/sts/vie/biosses_stsvn.py +1 -5
  388. mteb/tasks/sts/vie/sickr_stsvn.py +1 -5
  389. mteb/tasks/sts/vie/sts_benchmark_stsvn.py +1 -5
  390. mteb/tasks/zeroshot_classification/eng/gtsrb.py +1 -1
  391. mteb/tasks/zeroshot_classification/eng/patch_camelyon.py +1 -1
  392. mteb/tasks/zeroshot_classification/eng/ucf101.py +1 -5
  393. mteb-2.1.19.dist-info/METADATA +253 -0
  394. {mteb-2.0.5.dist-info → mteb-2.1.19.dist-info}/RECORD +398 -330
  395. mteb/descriptive_stats/Classification/PersianTextTone.json +0 -56
  396. mteb/descriptive_stats/Image/Any2TextMutipleChoice/CVBenchCount.json +0 -37
  397. mteb/descriptive_stats/Image/Any2TextMutipleChoice/CVBenchDepth.json +0 -25
  398. mteb/descriptive_stats/Image/Any2TextMutipleChoice/CVBenchDistance.json +0 -25
  399. mteb/descriptive_stats/Image/Any2TextMutipleChoice/CVBenchRelation.json +0 -25
  400. mteb/descriptive_stats/Image/VisualSTS/STS12VisualSTS.json +0 -20
  401. mteb/descriptive_stats/Image/VisualSTS/STS13VisualSTS.json +0 -20
  402. mteb/descriptive_stats/Image/VisualSTS/STS14VisualSTS.json +0 -20
  403. mteb/descriptive_stats/Image/VisualSTS/STS15VisualSTS.json +0 -20
  404. mteb/descriptive_stats/Image/VisualSTS/STS16VisualSTS.json +0 -20
  405. mteb/descriptive_stats/Image/VisualSTS/STS17MultilingualVisualSTS.json +0 -220
  406. mteb/descriptive_stats/Image/VisualSTS/STSBenchmarkMultilingualVisualSTS.json +0 -402
  407. mteb/descriptive_stats/Reranking/InstructIR.json +0 -31
  408. mteb-2.0.5.dist-info/METADATA +0 -455
  409. {mteb-2.0.5.dist-info → mteb-2.1.19.dist-info}/WHEEL +0 -0
  410. {mteb-2.0.5.dist-info → mteb-2.1.19.dist-info}/entry_points.txt +0 -0
  411. {mteb-2.0.5.dist-info → mteb-2.1.19.dist-info}/licenses/LICENSE +0 -0
  412. {mteb-2.0.5.dist-info → mteb-2.1.19.dist-info}/top_level.txt +0 -0
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
5
5
  class CQADupstackGisVN(AbsTaskRetrieval):
6
6
  metadata = TaskMetadata(
7
7
  name="CQADupstackGis-VN",
8
- description="""A translated dataset from CQADupStack: A Benchmark Data Set for Community Question-Answering Research
9
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
10
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
11
- - Applies advanced embedding models to filter the translations.
12
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
8
+ description="A translated dataset from CQADupStack: A Benchmark Data Set for Community Question-Answering Research The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
13
9
  reference="http://nlp.cis.unimelb.edu.au/resources/cqadupstack/",
14
10
  dataset={
15
11
  "path": "GreenNode/cqadupstack-gis-vn",
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
5
5
  class CQADupstackMathematicaVN(AbsTaskRetrieval):
6
6
  metadata = TaskMetadata(
7
7
  name="CQADupstackMathematica-VN",
8
- description="""A translated dataset from CQADupStack: A Benchmark Data Set for Community Question-Answering Research
9
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
10
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
11
- - Applies advanced embedding models to filter the translations.
12
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
8
+ description="A translated dataset from CQADupStack: A Benchmark Data Set for Community Question-Answering Research The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
13
9
  reference="http://nlp.cis.unimelb.edu.au/resources/cqadupstack/",
14
10
  dataset={
15
11
  "path": "GreenNode/cqadupstack-mathematica-vn",
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
5
5
  class CQADupstackPhysicsVN(AbsTaskRetrieval):
6
6
  metadata = TaskMetadata(
7
7
  name="CQADupstackPhysics-VN",
8
- description="""A translated dataset from CQADupStack: A Benchmark Data Set for Community Question-Answering Research
9
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
10
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
11
- - Applies advanced embedding models to filter the translations.
12
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
8
+ description="A translated dataset from CQADupStack: A Benchmark Data Set for Community Question-Answering Research The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
13
9
  reference="http://nlp.cis.unimelb.edu.au/resources/cqadupstack/",
14
10
  dataset={
15
11
  "path": "GreenNode/cqadupstack-physics-vn",
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
5
5
  class CQADupstackProgrammersRetrievalVN(AbsTaskRetrieval):
6
6
  metadata = TaskMetadata(
7
7
  name="CQADupstackProgrammers-VN",
8
- description="""A translated dataset from CQADupStack: A Benchmark Data Set for Community Question-Answering Research
9
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
10
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
11
- - Applies advanced embedding models to filter the translations.
12
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
8
+ description="A translated dataset from CQADupStack: A Benchmark Data Set for Community Question-Answering Research The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
13
9
  reference="http://nlp.cis.unimelb.edu.au/resources/cqadupstack/",
14
10
  dataset={
15
11
  "path": "GreenNode/cqadupstack-programmers-vn",
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
5
5
  class CQADupstackStatsVN(AbsTaskRetrieval):
6
6
  metadata = TaskMetadata(
7
7
  name="CQADupstackStats-VN",
8
- description="""A translated dataset from CQADupStack: A Benchmark Data Set for Community Question-Answering Research
9
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
10
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
11
- - Applies advanced embedding models to filter the translations.
12
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
8
+ description="A translated dataset from CQADupStack: A Benchmark Data Set for Community Question-Answering Research The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
13
9
  reference="http://nlp.cis.unimelb.edu.au/resources/cqadupstack/",
14
10
  dataset={
15
11
  "path": "GreenNode/cqadupstack-stats-vn",
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
5
5
  class CQADupstackTexVN(AbsTaskRetrieval):
6
6
  metadata = TaskMetadata(
7
7
  name="CQADupstackTex-VN",
8
- description="""A translated dataset from CQADupStack: A Benchmark Data Set for Community Question-Answering Research
9
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
10
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
11
- - Applies advanced embedding models to filter the translations.
12
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
8
+ description="A translated dataset from CQADupStack: A Benchmark Data Set for Community Question-Answering Research The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
13
9
  reference="http://nlp.cis.unimelb.edu.au/resources/cqadupstack/",
14
10
  dataset={
15
11
  "path": "GreenNode/cqadupstack-tex-vn",
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
5
5
  class CQADupstackUnixVN(AbsTaskRetrieval):
6
6
  metadata = TaskMetadata(
7
7
  name="CQADupstackUnix-VN",
8
- description="""A translated dataset from CQADupStack: A Benchmark Data Set for Community Question-Answering Research
9
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
10
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
11
- - Applies advanced embedding models to filter the translations.
12
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
8
+ description="A translated dataset from CQADupStack: A Benchmark Data Set for Community Question-Answering Research The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
13
9
  reference="http://nlp.cis.unimelb.edu.au/resources/cqadupstack/",
14
10
  dataset={
15
11
  "path": "GreenNode/cqadupstack-unix-vn",
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
5
5
  class CQADupstackWebmastersVN(AbsTaskRetrieval):
6
6
  metadata = TaskMetadata(
7
7
  name="CQADupstackWebmasters-VN",
8
- description="""A translated dataset from CQADupStack: A Benchmark Data Set for Community Question-Answering Research
9
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
10
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
11
- - Applies advanced embedding models to filter the translations.
12
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
8
+ description="A translated dataset from CQADupStack: A Benchmark Data Set for Community Question-Answering Research The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
13
9
  reference="http://nlp.cis.unimelb.edu.au/resources/cqadupstack/",
14
10
  dataset={
15
11
  "path": "GreenNode/cqadupstack-webmasters-vn",
@@ -9,11 +9,7 @@ class CQADupstackWordpressVN(AbsTaskRetrieval):
9
9
  "path": "GreenNode/cqadupstack-wordpress-vn",
10
10
  "revision": "2230f80e1baf42aa005731ca86577621c566fcd7",
11
11
  },
12
- description="""A translated dataset from CQADupStack: A Benchmark Data Set for Community Question-Answering Research
13
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
14
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
15
- - Applies advanced embedding models to filter the translations.
16
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
12
+ description="A translated dataset from CQADupStack: A Benchmark Data Set for Community Question-Answering Research The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
17
13
  reference="http://nlp.cis.unimelb.edu.au/resources/cqadupstack/",
18
14
  type="Retrieval",
19
15
  category="t2t",
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
5
5
  class DBPediaVN(AbsTaskRetrieval):
6
6
  metadata = TaskMetadata(
7
7
  name="DBPedia-VN",
8
- description="""A translated dataset from DBpedia-Entity is a standard test collection for entity search over the DBpedia knowledge base
9
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
10
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
11
- - Applies advanced embedding models to filter the translations.
12
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
8
+ description="A translated dataset from DBpedia-Entity is a standard test collection for entity search over the DBpedia knowledge base The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
13
9
  reference="https://github.com/iai-group/DBpedia-Entity/",
14
10
  dataset={
15
11
  "path": "GreenNode/dbpedia-vn",
@@ -9,13 +9,7 @@ class FEVERVN(AbsTaskRetrieval):
9
9
  "path": "GreenNode/fever-vn",
10
10
  "revision": "a543dd8b98aed3603110c01d26db05ba39b87d49",
11
11
  },
12
- description="""A translated dataset from FEVER (Fact Extraction and VERification) consists of 185,445 claims generated by altering sentences
13
- extracted from Wikipedia and subsequently verified without knowledge of the sentence they were
14
- derived from.
15
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
16
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
17
- - Applies advanced embedding models to filter the translations.
18
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
12
+ description="A translated dataset from FEVER (Fact Extraction and VERification) consists of 185,445 claims generated by altering sentences extracted from Wikipedia and subsequently verified without knowledge of the sentence they were derived from. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
19
13
  reference="https://fever.ai/",
20
14
  type="Retrieval",
21
15
  category="t2t",
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
5
5
  class FiQA2018VN(AbsTaskRetrieval):
6
6
  metadata = TaskMetadata(
7
7
  name="FiQA2018-VN",
8
- description="""A translated dataset from Financial Opinion Mining and Question Answering
9
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
10
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
11
- - Applies advanced embedding models to filter the translations.
12
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
8
+ description="A translated dataset from Financial Opinion Mining and Question Answering The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
13
9
  reference="https://sites.google.com/view/fiqa/",
14
10
  dataset={
15
11
  "path": "GreenNode/fiqa-vn",
@@ -26,5 +26,20 @@ class GreenNodeTableMarkdownRetrieval(AbsTaskRetrieval):
26
26
  annotations_creators="human-annotated",
27
27
  dialect=[],
28
28
  sample_creation="found",
29
- bibtex_citation="", # TODO: Add bibtex citation when the paper is published
29
+ bibtex_citation=r"""
30
+ @inproceedings{10.1007/978-981-95-1746-6_17,
31
+ abstract = {Information retrieval often comes in plain text, lacking semi-structured text such as HTML and markdown, retrieving data that contains rich format such as table became non-trivial. In this paper, we tackle this challenge by introducing a new dataset, GreenNode Table Retrieval VN (GN-TRVN), which is collected from a massive corpus, a wide range of topics, and a longer context compared to ViQuAD2.0. To evaluate the effectiveness of our proposed dataset, we introduce two versions, M3-GN-VN and M3-GN-VN-Mixed, by fine-tuning the M3-Embedding model on this dataset. Experimental results show that our models consistently outperform the baselines, including the base model, across most evaluation criteria on various datasets such as VieQuADRetrieval, ZacLegalTextRetrieval, and GN-TRVN. In general, we release a more comprehensive dataset and two model versions that improve response performance for Vietnamese Markdown Table Retrieval.},
32
+ address = {Singapore},
33
+ author = {Pham, Bao Loc
34
+ and Hoang, Quoc Viet
35
+ and Luu, Quy Tung
36
+ and Vo, Trong Thu},
37
+ booktitle = {Proceedings of the Fifth International Conference on Intelligent Systems and Networks},
38
+ isbn = {978-981-95-1746-6},
39
+ pages = {153--163},
40
+ publisher = {Springer Nature Singapore},
41
+ title = {GN-TRVN: A Benchmark for Vietnamese Table Markdown Retrieval Task},
42
+ year = {2026},
43
+ }
44
+ """,
30
45
  )
@@ -9,12 +9,7 @@ class HotpotQAVN(AbsTaskRetrieval):
9
9
  "path": "GreenNode/hotpotqa-vn",
10
10
  "revision": "8a5220c7af5084f0d5d2afeb74f9c2b41b759ff0",
11
11
  },
12
- description="""A translated dataset from HotpotQA is a question answering dataset featuring natural, multi-hop questions, with strong
13
- supervision for supporting facts to enable more explainable question answering systems.
14
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
15
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
16
- - Applies advanced embedding models to filter the translations.
17
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
12
+ description="A translated dataset from HotpotQA is a question answering dataset featuring natural, multi-hop questions, with strong supervision for supporting facts to enable more explainable question answering systems. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
18
13
  reference="https://hotpotqa.github.io/",
19
14
  type="Retrieval",
20
15
  category="t2t",
@@ -9,11 +9,7 @@ class MSMARCOVN(AbsTaskRetrieval):
9
9
  "path": "GreenNode/msmarco-vn",
10
10
  "revision": "85d1ad4cc9070b8d019d65f5af1631a2ab91e294",
11
11
  },
12
- description="""A translated dataset from MS MARCO is a collection of datasets focused on deep learning in search
13
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
14
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
15
- - Applies advanced embedding models to filter the translations.
16
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
12
+ description="A translated dataset from MS MARCO is a collection of datasets focused on deep learning in search The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
17
13
  reference="https://microsoft.github.io/msmarco/",
18
14
  type="Retrieval",
19
15
  category="t2t",
@@ -9,11 +9,7 @@ class NFCorpusVN(AbsTaskRetrieval):
9
9
  "path": "GreenNode/nfcorpus-vn",
10
10
  "revision": "a13d72fbb859be3dc19ab669d1ec9510407d2dcd",
11
11
  },
12
- description="""A translated dataset from NFCorpus: A Full-Text Learning to Rank Dataset for Medical Information Retrieval
13
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
14
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
15
- - Applies advanced embedding models to filter the translations.
16
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
12
+ description="A translated dataset from NFCorpus: A Full-Text Learning to Rank Dataset for Medical Information Retrieval The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
17
13
  reference="https://www.cl.uni-heidelberg.de/statnlpgroup/nfcorpus/",
18
14
  type="Retrieval",
19
15
  category="t2t",
@@ -9,11 +9,7 @@ class NQVN(AbsTaskRetrieval):
9
9
  "path": "GreenNode/nq-vn",
10
10
  "revision": "40a6d7f343b9c9f4855a426d8c431ad5f8aaf56b",
11
11
  },
12
- description="""A translated dataset from NFCorpus: A Full-Text Learning to Rank Dataset for Medical Information Retrieval
13
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
14
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
15
- - Applies advanced embedding models to filter the translations.
16
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
12
+ description="A translated dataset from NFCorpus: A Full-Text Learning to Rank Dataset for Medical Information Retrieval The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
17
13
  reference="https://ai.google.com/research/NaturalQuestions/",
18
14
  type="Retrieval",
19
15
  category="t2t",
@@ -9,12 +9,7 @@ class QuoraVN(AbsTaskRetrieval):
9
9
  "path": "GreenNode/quora-vn",
10
10
  "revision": "3363d81e41b67c1032bf3b234882a03d271e2289",
11
11
  },
12
- description="""A translated dataset from QuoraRetrieval is based on questions that are marked as duplicates on the Quora platform. Given a
13
- question, find other (duplicate) questions.
14
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
15
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
16
- - Applies advanced embedding models to filter the translations.
17
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
12
+ description="A translated dataset from QuoraRetrieval is based on questions that are marked as duplicates on the Quora platform. Given a question, find other (duplicate) questions. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
18
13
  reference="https://quoradata.quora.com/First-Quora-Dataset-Release-Question-Pairs",
19
14
  type="Retrieval",
20
15
  category="t2t",
@@ -9,11 +9,7 @@ class SciFactVN(AbsTaskRetrieval):
9
9
  "path": "GreenNode/scifact-vn",
10
10
  "revision": "483a7cf890c523c954e7751d328c5bb65061dcff",
11
11
  },
12
- description="""A translated dataset from SciFact verifies scientific claims using evidence from the research literature containing scientific paper abstracts.
13
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
14
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
15
- - Applies advanced embedding models to filter the translations.
16
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
12
+ description="A translated dataset from SciFact verifies scientific claims using evidence from the research literature containing scientific paper abstracts. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
17
13
  reference="https://github.com/allenai/scifact",
18
14
  type="Retrieval",
19
15
  category="t2t",
@@ -9,12 +9,7 @@ class SCIDOCSVN(AbsTaskRetrieval):
9
9
  "path": "GreenNode/scidocs-vn",
10
10
  "revision": "724cddfa9d328a193f303a0a9b7789468ac79f26",
11
11
  },
12
- description="""A translated dataset from SciDocs, a new evaluation benchmark consisting of seven document-level tasks ranging from citation
13
- prediction, to document classification and recommendation.
14
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
15
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
16
- - Applies advanced embedding models to filter the translations.
17
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
12
+ description="A translated dataset from SciDocs, a new evaluation benchmark consisting of seven document-level tasks ranging from citation prediction, to document classification and recommendation. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
18
13
  reference="https://allenai.org/data/scidocs",
19
14
  type="Retrieval",
20
15
  category="t2t",
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
5
5
  class Touche2020VN(AbsTaskRetrieval):
6
6
  metadata = TaskMetadata(
7
7
  name="Touche2020-VN",
8
- description="""A translated dataset from Touché Task 1: Argument Retrieval for Controversial Questions
9
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
10
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
11
- - Applies advanced embedding models to filter the translations.
12
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
8
+ description="A translated dataset from Touché Task 1: Argument Retrieval for Controversial Questions The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
13
9
  reference="https://webis.de/events/touche-20/shared-task-1.html",
14
10
  dataset={
15
11
  "path": "GreenNode/webis-touche2020-vn",
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
5
5
  class TRECCOVIDVN(AbsTaskRetrieval):
6
6
  metadata = TaskMetadata(
7
7
  name="TRECCOVID-VN",
8
- description="""A translated dataset from TRECCOVID is an ad-hoc search challenge based on the COVID-19 dataset containing scientific articles related to the COVID-19 pandemic.
9
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
10
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
11
- - Applies advanced embedding models to filter the translations.
12
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
8
+ description="A translated dataset from TRECCOVID is an ad-hoc search challenge based on the COVID-19 dataset containing scientific articles related to the COVID-19 pandemic. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
13
9
  reference="https://ir.nist.gov/covidSubmit/index.html",
14
10
  dataset={
15
11
  "path": "GreenNode/trec-covid-vn",
@@ -7,6 +7,7 @@ from .fra import *
7
7
  from .jpn import *
8
8
  from .kor import *
9
9
  from .multilingual import *
10
+ from .nld import *
10
11
  from .pol import *
11
12
  from .por import *
12
13
  from .ron import *
@@ -0,0 +1,5 @@
1
+ from .sick_nl_sts import SICKNLSTS
2
+
3
+ __all__ = [
4
+ "SICKNLSTS",
5
+ ]
@@ -0,0 +1,42 @@
1
+ from mteb.abstasks import AbsTaskSTS
2
+ from mteb.abstasks.task_metadata import TaskMetadata
3
+
4
+
5
+ class SICKNLSTS(AbsTaskSTS):
6
+ fast_loading = True
7
+ metadata = TaskMetadata(
8
+ name="SICK-NL-STS",
9
+ dataset={
10
+ "path": "clips/mteb-nl-sick-sts-pr",
11
+ "revision": "7f88f003fc4e37ed8cd9ade84e390d871b032fef",
12
+ },
13
+ description="SICK-NL (read: signal), a dataset targeting Natural Language Inference in Dutch. SICK-NL is "
14
+ "obtained by translating the SICK dataset of (Marelli et al., 2014) from English into Dutch.",
15
+ reference="https://aclanthology.org/2021.eacl-main.126/",
16
+ type="STS",
17
+ category="t2t",
18
+ modalities=["text"],
19
+ eval_splits=["test"],
20
+ eval_langs=["nld-Latn"],
21
+ main_score="cosine_spearman",
22
+ date=("2012-01-01", "2017-12-31"),
23
+ domains=["News", "Social", "Web", "Spoken", "Written"],
24
+ task_subtypes=[],
25
+ license="mit",
26
+ annotations_creators="human-annotated",
27
+ dialect=[],
28
+ sample_creation="machine-translated",
29
+ bibtex_citation=r"""
30
+ @inproceedings{wijnholds2021sick,
31
+ author = {Wijnholds, Gijs and Moortgat, Michael},
32
+ booktitle = {Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume},
33
+ pages = {1474--1479},
34
+ title = {SICK-NL: A Dataset for Dutch Natural Language Inference},
35
+ year = {2021},
36
+ }
37
+ """,
38
+ prompt={"query": "Haal semantisch vergelijkbare tekst op"},
39
+ )
40
+
41
+ min_score = 0
42
+ max_score = 5
@@ -9,11 +9,7 @@ class BiossesSTSVN(AbsTaskSTS):
9
9
  "path": "GreenNode/biosses-sts-vn",
10
10
  "revision": "1dae4a6df91c0852680cd4ab48c8c1d8a9ed49b2",
11
11
  },
12
- description="""A translated dataset from Biomedical Semantic Similarity Estimation.
13
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
14
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
15
- - Applies advanced embedding models to filter the translations.
16
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
12
+ description="A translated dataset from Biomedical Semantic Similarity Estimation. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
17
13
  reference="https://tabilab.cmpe.boun.edu.tr/BIOSSES/DataSet.html",
18
14
  type="STS",
19
15
  category="t2c",
@@ -9,11 +9,7 @@ class SickrSTSVN(AbsTaskSTS):
9
9
  "path": "GreenNode/sickr-sts-vn",
10
10
  "revision": "bc89f0401983c456b609f7fb324278f346b2cccf",
11
11
  },
12
- description="""A translated dataset from Semantic Textual Similarity SICK-R dataset as described here:
13
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
14
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
15
- - Applies advanced embedding models to filter the translations.
16
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
12
+ description="A translated dataset from Semantic Textual Similarity SICK-R dataset as described here: The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
17
13
  reference="https://aclanthology.org/2020.lrec-1.207",
18
14
  type="STS",
19
15
  category="t2c",
@@ -9,11 +9,7 @@ class STSBenchmarkSTSVN(AbsTaskSTS):
9
9
  "path": "GreenNode/stsbenchmark-sts-vn",
10
10
  "revision": "f24d66738cda4a02138ada5af7689a92ce1fcad6",
11
11
  },
12
- description="""A translated dataset from Semantic Textual Similarity Benchmark (STSbenchmark) dataset.
13
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
14
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
15
- - Applies advanced embedding models to filter the translations.
16
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
12
+ description="A translated dataset from Semantic Textual Similarity Benchmark (STSbenchmark) dataset. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
17
13
  reference="https://github.com/PhilipMay/stsb-multi-mt/",
18
14
  type="STS",
19
15
  category="t2c",
@@ -9,7 +9,7 @@ from mteb.abstasks.zeroshot_classification import (
9
9
  class GTSRBZeroShotClassification(AbsTaskZeroShotClassification):
10
10
  metadata = TaskMetadata(
11
11
  name="GTSRBZeroShot",
12
- description="""The German Traffic Sign Recognition Benchmark (GTSRB) is a multi-class classification dataset for traffic signs. It consists of dataset of more than 50,000 traffic sign images. The dataset comprises 43 classes with unbalanced class frequencies.""",
12
+ description="The German Traffic Sign Recognition Benchmark (GTSRB) is a multi-class classification dataset for traffic signs. It consists of dataset of more than 50,000 traffic sign images. The dataset comprises 43 classes with unbalanced class frequencies.",
13
13
  reference="https://benchmark.ini.rub.de/",
14
14
  dataset={
15
15
  "path": "clip-benchmark/wds_gtsrb",
@@ -9,7 +9,7 @@ from mteb.abstasks.zeroshot_classification import (
9
9
  class PatchCamelyonZeroShotClassification(AbsTaskZeroShotClassification):
10
10
  metadata = TaskMetadata(
11
11
  name="PatchCamelyonZeroShot",
12
- description="""Histopathology diagnosis classification dataset.""",
12
+ description="Histopathology diagnosis classification dataset.",
13
13
  reference="https://link.springer.com/chapter/10.1007/978-3-030-00934-2_24",
14
14
  dataset={
15
15
  "path": "clip-benchmark/wds_vtab-pcam",
@@ -7,11 +7,7 @@ from mteb.abstasks.zeroshot_classification import (
7
7
  class UCF101ZeroShotClassification(AbsTaskZeroShotClassification):
8
8
  metadata = TaskMetadata(
9
9
  name="UCF101ZeroShot",
10
- description="""UCF101 is an action recognition data set of realistic
11
- action videos collected from YouTube, having 101 action categories. This
12
- version of the dataset does not contain images but images saved frame by
13
- frame. Train and test splits are generated based on the authors' first
14
- version train/test list.""",
10
+ description="UCF101 is an action recognition data set of realistic action videos collected from YouTube, having 101 action categories. This version of the dataset does not contain images but images saved frame by frame. Train and test splits are generated based on the authors' first version train/test list.",
15
11
  reference="https://huggingface.co/datasets/flwrlabs/ucf101",
16
12
  dataset={
17
13
  "path": "flwrlabs/ucf101",