mteb 2.1.7__py3-none-any.whl → 2.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (239) hide show
  1. mteb/_create_dataloaders.py +6 -3
  2. mteb/_evaluators/any_sts_evaluator.py +14 -12
  3. mteb/_evaluators/clustering_evaluator.py +1 -1
  4. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +2 -2
  5. mteb/_evaluators/pair_classification_evaluator.py +3 -1
  6. mteb/_evaluators/sklearn_evaluator.py +15 -28
  7. mteb/_evaluators/text/bitext_mining_evaluator.py +4 -1
  8. mteb/_evaluators/text/summarization_evaluator.py +4 -2
  9. mteb/_evaluators/zeroshot_classification_evaluator.py +2 -2
  10. mteb/abstasks/clustering.py +1 -1
  11. mteb/abstasks/multilabel_classification.py +2 -2
  12. mteb/abstasks/task_metadata.py +1 -0
  13. mteb/benchmarks/benchmark.py +9 -0
  14. mteb/benchmarks/benchmarks/__init__.py +2 -0
  15. mteb/benchmarks/benchmarks/benchmarks.py +40 -1
  16. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3ComputerScienceRetrieval.json +214 -0
  17. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3EnergyRetrieval.json +214 -0
  18. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceEnRetrieval.json +214 -0
  19. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceFrRetrieval.json +214 -0
  20. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3HrRetrieval.json +214 -0
  21. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3IndustrialRetrieval.json +214 -0
  22. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3NuclearRetrieval.json +214 -0
  23. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PharmaceuticalsRetrieval.json +214 -0
  24. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PhysicsRetrieval.json +214 -0
  25. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3TelecomRetrieval.json +214 -0
  26. mteb/models/cache_wrappers/cache_wrapper.py +1 -1
  27. mteb/models/model_implementations/align_models.py +6 -0
  28. mteb/models/model_implementations/ara_models.py +7 -0
  29. mteb/models/model_implementations/blip2_models.py +9 -0
  30. mteb/models/model_implementations/blip_models.py +19 -0
  31. mteb/models/model_implementations/cadet_models.py +8 -0
  32. mteb/models/model_implementations/cde_models.py +12 -0
  33. mteb/models/model_implementations/codefuse_models.py +15 -0
  34. mteb/models/model_implementations/codesage_models.py +12 -0
  35. mteb/models/model_implementations/misc_models.py +6 -0
  36. mteb/models/model_implementations/moco_models.py +9 -0
  37. mteb/models/model_implementations/openclip_models.py +16 -0
  38. mteb/models/model_implementations/piccolo_models.py +6 -0
  39. mteb/models/model_implementations/rasgaard_models.py +7 -1
  40. mteb/models/model_implementations/tarka_models.py +317 -0
  41. mteb/models/search_wrappers.py +5 -5
  42. mteb/tasks/bitext_mining/multilingual/ru_sci_bench_bitext_mining.py +1 -5
  43. mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -6
  44. mteb/tasks/classification/ara/ajgt.py +1 -2
  45. mteb/tasks/classification/ara/hotel_review_sentiment_classification.py +1 -2
  46. mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -2
  47. mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -2
  48. mteb/tasks/classification/ara/tweet_emotion_classification.py +1 -2
  49. mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -2
  50. mteb/tasks/classification/ben/bengali_document_classification.py +1 -2
  51. mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -2
  52. mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -2
  53. mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +1 -2
  54. mteb/tasks/classification/ces/czech_product_review_sentiment_classification.py +1 -2
  55. mteb/tasks/classification/ces/czech_so_me_sentiment_classification.py +1 -2
  56. mteb/tasks/classification/dan/angry_tweets_classification.py +1 -2
  57. mteb/tasks/classification/dan/danish_political_comments_classification.py +1 -2
  58. mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -2
  59. mteb/tasks/classification/dan/dk_hate_classification.py +1 -2
  60. mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -2
  61. mteb/tasks/classification/deu/ten_k_gnad_classification.py +1 -2
  62. mteb/tasks/classification/eng/amazon_polarity_classification.py +1 -2
  63. mteb/tasks/classification/eng/arxiv_classification.py +1 -2
  64. mteb/tasks/classification/eng/banking77_classification.py +1 -2
  65. mteb/tasks/classification/eng/dbpedia_classification.py +1 -2
  66. mteb/tasks/classification/eng/emotion_classification.py +1 -2
  67. mteb/tasks/classification/eng/financial_phrasebank_classification.py +1 -2
  68. mteb/tasks/classification/eng/frenk_en_classification.py +1 -2
  69. mteb/tasks/classification/eng/gtsrb_classification.py +1 -1
  70. mteb/tasks/classification/eng/imdb_classification.py +1 -2
  71. mteb/tasks/classification/eng/legal_bench_classification.py +14 -120
  72. mteb/tasks/classification/eng/news_classification.py +1 -2
  73. mteb/tasks/classification/eng/patch_camelyon_classification.py +1 -1
  74. mteb/tasks/classification/eng/patent_classification.py +1 -2
  75. mteb/tasks/classification/eng/poem_sentiment_classification.py +1 -2
  76. mteb/tasks/classification/eng/sds_eye_protection_classification.py +1 -2
  77. mteb/tasks/classification/eng/sds_gloves_classification.py +1 -2
  78. mteb/tasks/classification/eng/toxic_chat_classification.py +2 -19
  79. mteb/tasks/classification/eng/toxic_conversations_classification.py +1 -2
  80. mteb/tasks/classification/eng/tweet_sentiment_extraction_classification.py +1 -2
  81. mteb/tasks/classification/eng/tweet_topic_single_classification.py +2 -13
  82. mteb/tasks/classification/eng/ucf101_classification.py +1 -5
  83. mteb/tasks/classification/eng/wikipedia_bio_met_chem_classification.py +1 -2
  84. mteb/tasks/classification/eng/wikipedia_chem_fields_classification.py +1 -2
  85. mteb/tasks/classification/eng/wikipedia_comp_chem_spectroscopy_classification.py +1 -2
  86. mteb/tasks/classification/eng/wikipedia_crystallography_analytical_classification.py +1 -2
  87. mteb/tasks/classification/eng/wikipedia_theoretical_applied_classification.py +1 -2
  88. mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -2
  89. mteb/tasks/classification/eng/yelp_review_full_classification.py +1 -2
  90. mteb/tasks/classification/est/estonian_valence.py +1 -2
  91. mteb/tasks/classification/fas/fa_mteb_classification.py +7 -14
  92. mteb/tasks/classification/fil/filipino_hate_speech_classification.py +1 -2
  93. mteb/tasks/classification/fin/fin_toxicity_classification.py +2 -11
  94. mteb/tasks/classification/fra/french_book_reviews.py +1 -2
  95. mteb/tasks/classification/fra/movie_review_sentiment_classification.py +1 -2
  96. mteb/tasks/classification/guj/gujarati_news_classification.py +1 -2
  97. mteb/tasks/classification/heb/hebrew_sentiment_analysis.py +1 -2
  98. mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -2
  99. mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -2
  100. mteb/tasks/classification/hrv/frenk_hr_classification.py +1 -2
  101. mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +1 -2
  102. mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -2
  103. mteb/tasks/classification/ita/italian_linguist_acceptability_classification.py +1 -2
  104. mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -2
  105. mteb/tasks/classification/jpn/wrime_classification.py +1 -2
  106. mteb/tasks/classification/kan/kannada_news_classification.py +1 -2
  107. mteb/tasks/classification/kor/klue_tc.py +1 -2
  108. mteb/tasks/classification/kor/kor_hate_classification.py +2 -17
  109. mteb/tasks/classification/kor/kor_sarcasm_classification.py +2 -19
  110. mteb/tasks/classification/kur/kurdish_sentiment_classification.py +1 -2
  111. mteb/tasks/classification/mal/malayalam_news_classification.py +1 -2
  112. mteb/tasks/classification/mar/marathi_news_classification.py +1 -2
  113. mteb/tasks/classification/mkd/macedonian_tweet_sentiment_classification.py +1 -2
  114. mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -6
  115. mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -4
  116. mteb/tasks/classification/multilingual/ru_sci_bench_classification.py +4 -23
  117. mteb/tasks/classification/multilingual/scala_classification.py +1 -2
  118. mteb/tasks/classification/multilingual/sib200_classification.py +1 -6
  119. mteb/tasks/classification/mya/myanmar_news.py +1 -2
  120. mteb/tasks/classification/nep/nepali_news_classification.py +1 -2
  121. mteb/tasks/classification/nld/dutch_book_review_sentiment_classification.py +1 -3
  122. mteb/tasks/classification/nob/no_rec_classification.py +1 -2
  123. mteb/tasks/classification/nob/norwegian_parliament_classification.py +1 -2
  124. mteb/tasks/classification/ory/odia_news_classification.py +1 -2
  125. mteb/tasks/classification/pol/polish_classification.py +3 -6
  126. mteb/tasks/classification/ron/moroco.py +1 -2
  127. mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -2
  128. mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -2
  129. mteb/tasks/classification/rus/georeview_classification.py +1 -2
  130. mteb/tasks/classification/rus/headline_classification.py +1 -2
  131. mteb/tasks/classification/rus/inappropriateness_classification.py +1 -2
  132. mteb/tasks/classification/rus/ru_reviews_classification.py +1 -2
  133. mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -2
  134. mteb/tasks/classification/rus/senti_ru_eval.py +1 -2
  135. mteb/tasks/classification/sin/sinhala_news_classification.py +1 -2
  136. mteb/tasks/classification/sin/sinhala_news_source_classification.py +1 -2
  137. mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +1 -2
  138. mteb/tasks/classification/slk/slovak_hate_speech_classification.py +1 -2
  139. mteb/tasks/classification/slk/slovak_movie_review_sentiment_classification.py +1 -2
  140. mteb/tasks/classification/slv/frenk_sl_classification.py +1 -2
  141. mteb/tasks/classification/spa/spanish_news_classification.py +1 -2
  142. mteb/tasks/classification/spa/spanish_sentiment_classification.py +1 -2
  143. mteb/tasks/classification/ssw/siswati_news_classification.py +1 -2
  144. mteb/tasks/classification/swa/swahili_news_classification.py +1 -2
  145. mteb/tasks/classification/swe/dalaj_classification.py +1 -2
  146. mteb/tasks/classification/swe/swe_rec_classification.py +1 -2
  147. mteb/tasks/classification/swe/swedish_sentiment_classification.py +1 -2
  148. mteb/tasks/classification/tam/tamil_news_classification.py +1 -2
  149. mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +1 -2
  150. mteb/tasks/classification/tha/wisesight_sentiment_classification.py +1 -2
  151. mteb/tasks/classification/tsn/tswana_news_classification.py +1 -2
  152. mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +1 -2
  153. mteb/tasks/classification/tur/turkish_product_sentiment_classification.py +1 -2
  154. mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -15
  155. mteb/tasks/classification/urd/urdu_roman_sentiment_classification.py +1 -2
  156. mteb/tasks/classification/vie/amazon_counterfactual_vn_classification.py +1 -6
  157. mteb/tasks/classification/vie/amazon_polarity_vn_classification.py +1 -6
  158. mteb/tasks/classification/vie/amazon_reviews_vn_classification.py +1 -5
  159. mteb/tasks/classification/vie/banking77_vn_classification.py +1 -5
  160. mteb/tasks/classification/vie/emotion_vn_classification.py +1 -5
  161. mteb/tasks/classification/vie/imdb_vn_classification.py +1 -5
  162. mteb/tasks/classification/vie/massive_intent_vn_classification.py +1 -5
  163. mteb/tasks/classification/vie/massive_scenario_vn_classification.py +1 -5
  164. mteb/tasks/classification/vie/mtop_domain_vn_classification.py +1 -5
  165. mteb/tasks/classification/vie/mtop_intent_vn_classification.py +1 -5
  166. mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -5
  167. mteb/tasks/classification/vie/tweet_sentiment_extraction_vn_classification.py +1 -5
  168. mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -2
  169. mteb/tasks/classification/zho/cmteb_classification.py +5 -10
  170. mteb/tasks/classification/zho/yue_openrice_review_classification.py +1 -2
  171. mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -2
  172. mteb/tasks/clustering/jpn/mews_c16_ja_clustering.py +1 -3
  173. mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -6
  174. mteb/tasks/clustering/vie/reddit_clustering_p2p_vn.py +1 -5
  175. mteb/tasks/clustering/vie/reddit_clustering_vn.py +1 -5
  176. mteb/tasks/clustering/vie/stack_exchange_clustering_p2p_vn.py +1 -5
  177. mteb/tasks/clustering/vie/stack_exchange_clustering_vn.py +1 -5
  178. mteb/tasks/clustering/vie/twenty_newsgroups_clustering_vn.py +1 -5
  179. mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -5
  180. mteb/tasks/multilabel_classification/kor/kor_hate_speech_ml_classification.py +1 -9
  181. mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -6
  182. mteb/tasks/multilabel_classification/por/brazilian_toxic_tweets_classification.py +1 -6
  183. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
  184. mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -2
  185. mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -6
  186. mteb/tasks/pair_classification/eng/legal_bench_pc.py +1 -9
  187. mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -5
  188. mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -5
  189. mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -5
  190. mteb/tasks/regression/multilingual/ru_sci_bench_regression.py +2 -6
  191. mteb/tasks/reranking/multilingual/x_glue_wpr_reranking.py +1 -2
  192. mteb/tasks/reranking/vie/ask_ubuntu_dup_questions_vn.py +1 -5
  193. mteb/tasks/reranking/vie/sci_docs_reranking_vn.py +1 -5
  194. mteb/tasks/reranking/vie/stack_overflow_dup_questions_vn.py +1 -5
  195. mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -8
  196. mteb/tasks/retrieval/jpn/ja_cwir_retrieval.py +1 -4
  197. mteb/tasks/retrieval/multilingual/__init__.py +22 -0
  198. mteb/tasks/retrieval/multilingual/mkqa_retrieval.py +1 -2
  199. mteb/tasks/retrieval/multilingual/mlqa_retrieval.py +1 -4
  200. mteb/tasks/retrieval/multilingual/multi_long_doc_retrieval.py +1 -2
  201. mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +2 -12
  202. mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +399 -0
  203. mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -7
  204. mteb/tasks/retrieval/vie/argu_ana_vn_retrieval.py +1 -5
  205. mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +1 -5
  206. mteb/tasks/retrieval/vie/cqa_dupstack_android_vn_retrieval.py +1 -5
  207. mteb/tasks/retrieval/vie/cqa_dupstack_gis_vn_retrieval.py +1 -5
  208. mteb/tasks/retrieval/vie/cqa_dupstack_mathematica_vn_retrieval.py +1 -5
  209. mteb/tasks/retrieval/vie/cqa_dupstack_physics_vn_retrieval.py +1 -5
  210. mteb/tasks/retrieval/vie/cqa_dupstack_programmers_vn_retrieval.py +1 -5
  211. mteb/tasks/retrieval/vie/cqa_dupstack_stats_vn_retrieval.py +1 -5
  212. mteb/tasks/retrieval/vie/cqa_dupstack_tex_vn_retrieval.py +1 -5
  213. mteb/tasks/retrieval/vie/cqa_dupstack_unix_vn_retrieval.py +1 -5
  214. mteb/tasks/retrieval/vie/cqa_dupstack_webmasters_vn_retrieval.py +1 -5
  215. mteb/tasks/retrieval/vie/cqa_dupstack_wordpress_vn_retrieval.py +1 -5
  216. mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +1 -5
  217. mteb/tasks/retrieval/vie/fevervn_retrieval.py +1 -7
  218. mteb/tasks/retrieval/vie/fi_qa2018_vn_retrieval.py +1 -5
  219. mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +1 -6
  220. mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +1 -5
  221. mteb/tasks/retrieval/vie/nf_corpus_vn_retrieval.py +1 -5
  222. mteb/tasks/retrieval/vie/nqvn_retrieval.py +1 -5
  223. mteb/tasks/retrieval/vie/quora_vn_retrieval.py +1 -6
  224. mteb/tasks/retrieval/vie/sci_fact_vn_retrieval.py +1 -5
  225. mteb/tasks/retrieval/vie/scidocsvn_retrieval.py +1 -6
  226. mteb/tasks/retrieval/vie/touche2020_vn_retrieval.py +1 -5
  227. mteb/tasks/retrieval/vie/treccovidvn_retrieval.py +1 -5
  228. mteb/tasks/sts/vie/biosses_stsvn.py +1 -5
  229. mteb/tasks/sts/vie/sickr_stsvn.py +1 -5
  230. mteb/tasks/sts/vie/sts_benchmark_stsvn.py +1 -5
  231. mteb/tasks/zeroshot_classification/eng/gtsrb.py +1 -1
  232. mteb/tasks/zeroshot_classification/eng/patch_camelyon.py +1 -1
  233. mteb/tasks/zeroshot_classification/eng/ucf101.py +1 -5
  234. {mteb-2.1.7.dist-info → mteb-2.1.8.dist-info}/METADATA +1 -1
  235. {mteb-2.1.7.dist-info → mteb-2.1.8.dist-info}/RECORD +239 -228
  236. {mteb-2.1.7.dist-info → mteb-2.1.8.dist-info}/WHEEL +0 -0
  237. {mteb-2.1.7.dist-info → mteb-2.1.8.dist-info}/entry_points.txt +0 -0
  238. {mteb-2.1.7.dist-info → mteb-2.1.8.dist-info}/licenses/LICENSE +0 -0
  239. {mteb-2.1.7.dist-info → mteb-2.1.8.dist-info}/top_level.txt +0 -0
@@ -5,12 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
5
5
  class AmazonPolarityVNClassification(AbsTaskClassification):
6
6
  metadata = TaskMetadata(
7
7
  name="AmazonPolarityVNClassification",
8
- description="""A collection of translated Amazon customer reviews annotated for polarity classification.
9
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
10
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
11
- - Applies advanced embedding models to filter the translations.
12
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.
13
- """,
8
+ description="A collection of translated Amazon customer reviews annotated for polarity classification. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
14
9
  reference="https://huggingface.co/datasets/amazon_polarity",
15
10
  dataset={
16
11
  "path": "GreenNode/amazon-polarity-vn",
@@ -9,11 +9,7 @@ class AmazonReviewsVNClassification(AbsTaskClassification):
9
9
  "path": "GreenNode/amazon-reviews-multi-vn",
10
10
  "revision": "27da94deb6d4f44af789a3d70750fa506b79f189",
11
11
  },
12
- description="""A collection of translated Amazon reviews specifically designed to aid research in multilingual text classification.
13
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
14
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
15
- - Applies advanced embedding models to filter the translations.
16
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
12
+ description="A collection of translated Amazon reviews specifically designed to aid research in multilingual text classification. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
17
13
  reference="https://arxiv.org/abs/2010.02573",
18
14
  category="t2c",
19
15
  type="Classification",
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
5
5
  class Banking77VNClassification(AbsTaskClassification):
6
6
  metadata = TaskMetadata(
7
7
  name="Banking77VNClassification",
8
- description="""A translated dataset composed of online banking queries annotated with their corresponding intents.
9
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
10
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
11
- - Applies advanced embedding models to filter the translations.
12
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
8
+ description="A translated dataset composed of online banking queries annotated with their corresponding intents. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
13
9
  reference="https://arxiv.org/abs/2003.04807",
14
10
  dataset={
15
11
  "path": "GreenNode/banking77-vn",
@@ -7,11 +7,7 @@ class EmotionVNClassification(AbsTaskClassification):
7
7
 
8
8
  metadata = TaskMetadata(
9
9
  name="EmotionVNClassification",
10
- description="""Emotion is a translated dataset of Vietnamese from English Twitter messages with six basic emotions: anger, fear, joy, love, sadness, and surprise.
11
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
12
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
13
- - Applies advanced embedding models to filter the translations.
14
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
10
+ description="Emotion is a translated dataset of Vietnamese from English Twitter messages with six basic emotions: anger, fear, joy, love, sadness, and surprise. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
15
11
  reference="https://www.aclweb.org/anthology/D18-1404",
16
12
  dataset={
17
13
  "path": "GreenNode/emotion-vn",
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
5
5
  class ImdbVNClassification(AbsTaskClassification):
6
6
  metadata = TaskMetadata(
7
7
  name="ImdbVNClassification",
8
- description="""A translated dataset of large movie reviews annotated for sentiment classification.
9
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
10
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
11
- - Applies advanced embedding models to filter the translations.
12
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
8
+ description="A translated dataset of large movie reviews annotated for sentiment classification. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
13
9
  dataset={
14
10
  "path": "GreenNode/imdb-vn",
15
11
  "revision": "0dccb383ee26c90c99d03c8674cf40de642f099a",
@@ -9,11 +9,7 @@ class MassiveIntentVNClassification(AbsTaskClassification):
9
9
  "path": "GreenNode/amazon-massive-intent-vn",
10
10
  "revision": "35c7ced69f958dbbaa24f792db4a9250e461866d",
11
11
  },
12
- description="""A translated dataset from MASSIVE: A 1M-Example Multilingual Natural Language Understanding Dataset with 51 Typologically-Diverse Languages
13
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
14
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
15
- - Applies advanced embedding models to filter the translations.
16
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
12
+ description="A translated dataset from MASSIVE: A 1M-Example Multilingual Natural Language Understanding Dataset with 51 Typologically-Diverse Languages The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
17
13
  reference="https://arxiv.org/abs/2204.08582#:~:text=MASSIVE%20contains%201M%20realistic%2C%20parallel,diverse%20languages%20from%2029%20genera.",
18
14
  category="t2c",
19
15
  type="Classification",
@@ -9,11 +9,7 @@ class MassiveScenarioVNClassification(AbsTaskClassification):
9
9
  "path": "GreenNode/amazon-massive-scenario-vn",
10
10
  "revision": "a82e282d9f5aec1a8cf7d868ce40f70669c16b89",
11
11
  },
12
- description="""A translated dataset from MASSIVE: A 1M-Example Multilingual Natural Language Understanding Dataset with 51 Typologically-Diverse Languages
13
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
14
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
15
- - Applies advanced embedding models to filter the translations.
16
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
12
+ description="A translated dataset from MASSIVE: A 1M-Example Multilingual Natural Language Understanding Dataset with 51 Typologically-Diverse Languages The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
17
13
  reference="https://arxiv.org/abs/2204.08582#:~:text=MASSIVE%20contains%201M%20realistic%2C%20parallel,diverse%20languages%20from%2029%20genera.",
18
14
  category="t2c",
19
15
  type="Classification",
@@ -9,11 +9,7 @@ class MTOPDomainVNClassification(AbsTaskClassification):
9
9
  "path": "GreenNode/mtop-domain-vn",
10
10
  "revision": "6e1ec8c54c018151c77472d94b1c0765230cf6ca",
11
11
  },
12
- description="""A translated dataset from MTOP: Multilingual Task-Oriented Semantic Parsing
13
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
14
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
15
- - Applies advanced embedding models to filter the translations.
16
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
12
+ description="A translated dataset from MTOP: Multilingual Task-Oriented Semantic Parsing The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
17
13
  reference="https://arxiv.org/pdf/2008.09335.pdf",
18
14
  category="t2c",
19
15
  type="Classification",
@@ -9,11 +9,7 @@ class MTOPIntentVNClassification(AbsTaskClassification):
9
9
  "path": "GreenNode/mtop-intent-vn",
10
10
  "revision": "c4e81a5c9a813a0142d905e261e5a446cc6fbc4a",
11
11
  },
12
- description="""A translated dataset from MTOP: Multilingual Task-Oriented Semantic Parsing
13
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
14
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
15
- - Applies advanced embedding models to filter the translations.
16
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
12
+ description="A translated dataset from MTOP: Multilingual Task-Oriented Semantic Parsing The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
17
13
  reference="https://arxiv.org/pdf/2008.09335.pdf",
18
14
  category="t2c",
19
15
  type="Classification",
@@ -7,11 +7,7 @@ class ToxicConversationsVNClassification(AbsTaskClassification):
7
7
 
8
8
  metadata = TaskMetadata(
9
9
  name="ToxicConversationsVNClassification",
10
- description="""A translated dataset from Collection of comments from the Civil Comments platform together with annotations if the comment is toxic or not.
11
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
12
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
13
- - Applies advanced embedding models to filter the translations.
14
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
10
+ description="A translated dataset from Collection of comments from the Civil Comments platform together with annotations if the comment is toxic or not. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
15
11
  reference="https://www.kaggle.com/competitions/jigsaw-unintended-bias-in-toxicity-classification/overview",
16
12
  dataset={
17
13
  "path": "GreenNode/toxic-conversations-50k-vn",
@@ -7,11 +7,7 @@ class TweetSentimentExtractionVNClassification(AbsTaskClassification):
7
7
 
8
8
  metadata = TaskMetadata(
9
9
  name="TweetSentimentExtractionVNClassification",
10
- description="""A collection of translated tweets annotated for sentiment extraction.
11
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
12
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
13
- - Applies advanced embedding models to filter the translations.
14
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
10
+ description="A collection of translated tweets annotated for sentiment extraction. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
15
11
  reference="https://www.kaggle.com/competitions/tweet-sentiment-extraction/overview",
16
12
  dataset={
17
13
  "path": "GreenNode/tweet-sentiment-extraction-vn",
@@ -45,8 +45,7 @@ class VieStudentFeedbackClassification(AbsTaskClassification):
45
45
  class VieStudentFeedbackClassificationV2(AbsTaskClassification):
46
46
  metadata = TaskMetadata(
47
47
  name="VieStudentFeedbackClassification.v2",
48
- description="""A Vietnamese dataset for classification of student feedback
49
- This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
48
+ description="A Vietnamese dataset for classification of student feedback This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
50
49
  reference="https://ieeexplore.ieee.org/document/8573337",
51
50
  dataset={
52
51
  "path": "mteb/vie_student_feedback",
@@ -79,8 +79,7 @@ Lan, Zhenzhong },
79
79
  class TNewsV2(AbsTaskClassification):
80
80
  metadata = TaskMetadata(
81
81
  name="TNews.v2",
82
- description="""Short Text Classification for News
83
- This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
82
+ description="Short Text Classification for News This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
84
83
  reference="https://www.cluebenchmarks.com/introduce.html",
85
84
  dataset={
86
85
  "path": "mteb/t_news",
@@ -229,8 +228,7 @@ Lan, Zhenzhong },
229
228
  class IFlyTekV2(AbsTaskClassification):
230
229
  metadata = TaskMetadata(
231
230
  name="IFlyTek.v2",
232
- description="""Long Text classification for the description of Apps
233
- This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
231
+ description="Long Text classification for the description of Apps This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
234
232
  reference="https://www.cluebenchmarks.com/introduce.html",
235
233
  dataset={
236
234
  "path": "mteb/i_fly_tek",
@@ -335,8 +333,7 @@ class MultilingualSentiment(AbsTaskClassification):
335
333
  class MultilingualSentimentV2(AbsTaskClassification):
336
334
  metadata = TaskMetadata(
337
335
  name="MultilingualSentiment.v2",
338
- description="""A collection of multilingual sentiments datasets grouped into 3 classes -- positive, neutral, negative
339
- This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
336
+ description="A collection of multilingual sentiments datasets grouped into 3 classes -- positive, neutral, negative This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
340
337
  reference="https://github.com/tyqiangz/multilingual-sentiment-datasets",
341
338
  dataset={
342
339
  "path": "mteb/multilingual_sentiment",
@@ -403,8 +400,7 @@ class JDReview(AbsTaskClassification):
403
400
  class JDReviewV2(AbsTaskClassification):
404
401
  metadata = TaskMetadata(
405
402
  name="JDReview.v2",
406
- description="""review for iphone
407
- This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
403
+ description="review for iphone This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
408
404
  reference="https://aclanthology.org/2023.nodalida-1.20/",
409
405
  dataset={
410
406
  "path": "mteb/jd_review",
@@ -514,8 +510,7 @@ class Waimai(AbsTaskClassification):
514
510
  class WaimaiV2(AbsTaskClassification):
515
511
  metadata = TaskMetadata(
516
512
  name="Waimai.v2",
517
- description="""Sentiment Analysis of user reviews on takeaway platforms
518
- This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
513
+ description="Sentiment Analysis of user reviews on takeaway platforms This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
519
514
  reference="https://aclanthology.org/2023.nodalida-1.20/",
520
515
  dataset={
521
516
  "path": "mteb/waimai",
@@ -48,8 +48,7 @@ class YueOpenriceReviewClassification(AbsTaskClassification):
48
48
  class YueOpenriceReviewClassificationV2(AbsTaskClassification):
49
49
  metadata = TaskMetadata(
50
50
  name="YueOpenriceReviewClassification.v2",
51
- description="""A Cantonese dataset for review classification
52
- This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
51
+ description="A Cantonese dataset for review classification This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
53
52
  reference="https://github.com/Christainx/Dataset_Cantonese_Openrice",
54
53
  dataset={
55
54
  "path": "mteb/yue_openrice_review",
@@ -45,8 +45,7 @@ class IsiZuluNewsClassification(AbsTaskClassification):
45
45
  class IsiZuluNewsClassificationV2(AbsTaskClassification):
46
46
  metadata = TaskMetadata(
47
47
  name="IsiZuluNewsClassification.v2",
48
- description="""isiZulu News Classification Dataset
49
- This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
48
+ description="isiZulu News Classification Dataset This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
50
49
  reference="https://huggingface.co/datasets/dsfsi/za-isizulu-siswati-news",
51
50
  dataset={
52
51
  "path": "mteb/isi_zulu_news",
@@ -8,9 +8,7 @@ class MewsC16JaClustering(AbsTaskClustering):
8
8
 
9
9
  metadata = TaskMetadata(
10
10
  name="MewsC16JaClustering",
11
- description="""MewsC-16 (Multilingual Short Text Clustering Dataset for News in 16 languages) is constructed from Wikinews.
12
- This dataset is the Japanese split of MewsC-16, containing topic sentences from Wikinews articles in 12 categories.
13
- More detailed information is available in the Appendix E of the citation.""",
11
+ description="MewsC-16 (Multilingual Short Text Clustering Dataset for News in 16 languages) is constructed from Wikinews. This dataset is the Japanese split of MewsC-16, containing topic sentences from Wikinews articles in 12 categories. More detailed information is available in the Appendix E of the citation.",
14
12
  reference="https://github.com/sbintuitions/JMTEB",
15
13
  dataset={
16
14
  "path": "mteb/MewsC16JaClustering",
@@ -210,12 +210,7 @@ class SIB200ClusteringFast(AbsTaskClustering):
210
210
 
211
211
  metadata = TaskMetadata(
212
212
  name="SIB200ClusteringS2S",
213
- description="""SIB-200 is the largest publicly available topic classification
214
- dataset based on Flores-200 covering 205 languages and dialects annotated. The dataset is
215
- annotated in English for the topics, science/technology, travel, politics, sports,
216
- health, entertainment, and geography. The labels are then transferred to the other languages
217
- in Flores-200 which are human-translated.
218
- """,
213
+ description="SIB-200 is the largest publicly available topic classification dataset based on Flores-200 covering 205 languages and dialects annotated. The dataset is annotated in English for the topics, science/technology, travel, politics, sports, health, entertainment, and geography. The labels are then transferred to the other languages in Flores-200 which are human-translated.",
219
214
  reference="https://arxiv.org/abs/2309.07445",
220
215
  dataset={
221
216
  "path": "mteb/sib200",
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
5
5
  class RedditClusteringP2PVN(AbsTaskClusteringLegacy):
6
6
  metadata = TaskMetadata(
7
7
  name="RedditClusteringP2P-VN",
8
- description="""A translated dataset from Clustering of title+posts from reddit. Clustering of 10 sets of 50k paragraphs and 40 sets of 10k paragraphs.
9
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
10
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
11
- - Applies advanced embedding models to filter the translations.
12
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
8
+ description="A translated dataset from Clustering of title+posts from reddit. Clustering of 10 sets of 50k paragraphs and 40 sets of 10k paragraphs. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
13
9
  reference="https://arxiv.org/abs/2104.07081",
14
10
  dataset={
15
11
  "path": "GreenNode/reddit-clustering-p2p-vn",
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
5
5
  class RedditClusteringVN(AbsTaskClusteringLegacy):
6
6
  metadata = TaskMetadata(
7
7
  name="RedditClustering-VN",
8
- description="""A translated dataset from Clustering of titles from 199 subreddits. Clustering of 25 sets, each with 10-50 classes, and each class with 100 - 1000 sentences.
9
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
10
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
11
- - Applies advanced embedding models to filter the translations.
12
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
8
+ description="A translated dataset from Clustering of titles from 199 subreddits. Clustering of 25 sets, each with 10-50 classes, and each class with 100 - 1000 sentences. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
13
9
  reference="https://arxiv.org/abs/2104.07081",
14
10
  dataset={
15
11
  "path": "GreenNode/reddit-clustering-vn",
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
5
5
  class StackExchangeClusteringP2PVN(AbsTaskClusteringLegacy):
6
6
  metadata = TaskMetadata(
7
7
  name="StackExchangeClusteringP2P-VN",
8
- description="""A translated Clustering of title+body from stackexchange. Clustering of 5 sets of 10k paragraphs and 5 sets of 5k paragraphs.
9
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
10
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
11
- - Applies advanced embedding models to filter the translations.
12
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
8
+ description="A translated Clustering of title+body from stackexchange. Clustering of 5 sets of 10k paragraphs and 5 sets of 5k paragraphs. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
13
9
  reference="https://arxiv.org/abs/2104.07081",
14
10
  dataset={
15
11
  "path": "GreenNode/stackexchange-clustering-p2p-vn",
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
5
5
  class StackExchangeClusteringVN(AbsTaskClusteringLegacy):
6
6
  metadata = TaskMetadata(
7
7
  name="StackExchangeClustering-VN",
8
- description="""A translated dataset from Clustering of titles from 121 stackexchanges. Clustering of 25 sets, each with 10-50 classes, and each class with 100 - 1000 sentences.
9
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
10
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
11
- - Applies advanced embedding models to filter the translations.
12
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
8
+ description="A translated dataset from Clustering of titles from 121 stackexchanges. Clustering of 25 sets, each with 10-50 classes, and each class with 100 - 1000 sentences. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
13
9
  reference="https://arxiv.org/abs/2104.07081",
14
10
  dataset={
15
11
  "path": "GreenNode/stackexchange-clustering-vn",
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
5
5
  class TwentyNewsgroupsClusteringVN(AbsTaskClusteringLegacy):
6
6
  metadata = TaskMetadata(
7
7
  name="TwentyNewsgroupsClustering-VN",
8
- description="""A translated dataset from Clustering of the 20 Newsgroups dataset (subject only).
9
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
10
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
11
- - Applies advanced embedding models to filter the translations.
12
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
8
+ description="A translated dataset from Clustering of the 20 Newsgroups dataset (subject only). The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
13
9
  reference="https://scikit-learn.org/0.19/datasets/twenty_newsgroups.html",
14
10
  dataset={
15
11
  "path": "GreenNode/twentynewsgroups-clustering-vn",
@@ -7,11 +7,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
7
7
  class EmitClassification(AbsTaskMultilabelClassification):
8
8
  metadata = TaskMetadata(
9
9
  name="EmitClassification",
10
- description="""The EMit dataset is a comprehensive resource for the detection of emotions in Italian social media texts.
11
- The EMit dataset consists of social media messages about TV shows, TV series, music videos, and advertisements.
12
- Each message is annotated with one or more of the 8 primary emotions defined by Plutchik
13
- (anger, anticipation, disgust, fear, joy, sadness, surprise, trust), as well as an additional label “love.”
14
- """,
10
+ description="The EMit dataset is a comprehensive resource for the detection of emotions in Italian social media texts. The EMit dataset consists of social media messages about TV shows, TV series, music videos, and advertisements. Each message is annotated with one or more of the 8 primary emotions defined by Plutchik (anger, anticipation, disgust, fear, joy, sadness, surprise, trust), as well as an additional label “love.”",
15
11
  reference="https://github.com/oaraque/emit",
16
12
  dataset={
17
13
  "path": "MattiaSangermano/emit",
@@ -7,15 +7,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
7
7
  class KorHateSpeechMLClassification(AbsTaskMultilabelClassification):
8
8
  metadata = TaskMetadata(
9
9
  name="KorHateSpeechMLClassification",
10
- description="""
11
- The Korean Multi-label Hate Speech Dataset, K-MHaS, consists of 109,692 utterances from Korean online news comments,
12
- labelled with 8 fine-grained hate speech classes (labels: Politics, Origin, Physical, Age, Gender, Religion, Race, Profanity)
13
- or Not Hate Speech class. Each utterance provides from a single to four labels that can handles Korean language patterns effectively.
14
- For more details, please refer to the paper about K-MHaS, published at COLING 2022.
15
- This dataset is based on the Korean online news comments available on Kaggle and Github.
16
- The unlabeled raw data was collected between January 2018 and June 2020.
17
- The language producers are users who left the comments on the Korean online news platform between 2018 and 2020.
18
- """,
10
+ description="The Korean Multi-label Hate Speech Dataset, K-MHaS, consists of 109,692 utterances from Korean online news comments, labelled with 8 fine-grained hate speech classes (labels: Politics, Origin, Physical, Age, Gender, Religion, Race, Profanity) or Not Hate Speech class. Each utterance provides from a single to four labels that can handles Korean language patterns effectively. For more details, please refer to the paper about K-MHaS, published at COLING 2022. This dataset is based on the Korean online news comments available on Kaggle and Github. The unlabeled raw data was collected between January 2018 and June 2020. The language producers are users who left the comments on the Korean online news platform between 2018 and 2020.",
19
11
  dataset={
20
12
  "path": "mteb/KorHateSpeechMLClassification",
21
13
  "revision": "47cd2e61b64f2f11ccb006a579cda71318c6de9b",
@@ -7,12 +7,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
7
7
  class MalteseNewsClassification(AbsTaskMultilabelClassification):
8
8
  metadata = TaskMetadata(
9
9
  name="MalteseNewsClassification",
10
- description="""A multi-label topic classification dataset for Maltese News
11
- Articles. The data was collected from the press_mt subset from Korpus
12
- Malti v4.0. Article contents were cleaned to filter out JavaScript, CSS,
13
- & repeated non-Maltese sub-headings. The labels are based on the category
14
- field from this corpus.
15
- """,
10
+ description="A multi-label topic classification dataset for Maltese News Articles. The data was collected from the press_mt subset from Korpus Malti v4.0. Article contents were cleaned to filter out JavaScript, CSS, & repeated non-Maltese sub-headings. The labels are based on the category field from this corpus.",
16
11
  reference="https://huggingface.co/datasets/MLRS/maltese_news_categories",
17
12
  dataset={
18
13
  "path": "MLRS/maltese_news_categories",
@@ -7,12 +7,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
7
7
  class BrazilianToxicTweetsClassification(AbsTaskMultilabelClassification):
8
8
  metadata = TaskMetadata(
9
9
  name="BrazilianToxicTweetsClassification",
10
- description="""
11
- ToLD-Br is the biggest dataset for toxic tweets in Brazilian Portuguese, crowdsourced by 42 annotators selected from
12
- a pool of 129 volunteers. Annotators were selected aiming to create a plural group in terms of demographics (ethnicity,
13
- sexual orientation, age, gender). Each tweet was labeled by three annotators in 6 possible categories: LGBTQ+phobia,
14
- Xenophobia, Obscene, Insult, Misogyny and Racism.
15
- """,
10
+ description="ToLD-Br is the biggest dataset for toxic tweets in Brazilian Portuguese, crowdsourced by 42 annotators selected from a pool of 129 volunteers. Annotators were selected aiming to create a plural group in terms of demographics (ethnicity, sexual orientation, age, gender). Each tweet was labeled by three annotators in 6 possible categories: LGBTQ+phobia, Xenophobia, Obscene, Insult, Misogyny and Racism.",
16
11
  reference="https://paperswithcode.com/dataset/told-br",
17
12
  dataset={
18
13
  "path": "mteb/BrazilianToxicTweetsClassification",
@@ -7,7 +7,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
7
7
  class SwedishPatentCPCGroupClassification(AbsTaskMultilabelClassification):
8
8
  metadata = TaskMetadata(
9
9
  name="SwedishPatentCPCGroupClassification",
10
- description="""This dataset contains historical Swedish patent documents (1885-1972) classified according to the Cooperative Patent Classification (CPC) system at the group level. Each document can have multiple labels, making this a challenging multi-label classification task with significant class imbalance and data sparsity characteristics. The dataset includes patent claims text extracted from digitally recreated versions of historical Swedish patents, generated using Optical Character Recognition (OCR) from original paper documents. The text quality varies due to OCR limitations, but all CPC labels were manually assigned by patent engineers at PRV (Swedish Patent and Registration Office), ensuring high reliability for machine learning applications.""",
10
+ description="This dataset contains historical Swedish patent documents (1885-1972) classified according to the Cooperative Patent Classification (CPC) system at the group level. Each document can have multiple labels, making this a challenging multi-label classification task with significant class imbalance and data sparsity characteristics. The dataset includes patent claims text extracted from digitally recreated versions of historical Swedish patents, generated using Optical Character Recognition (OCR) from original paper documents. The text quality varies due to OCR limitations, but all CPC labels were manually assigned by patent engineers at PRV (Swedish Patent and Registration Office), ensuring high reliability for machine learning applications.",
11
11
  reference="https://urn.kb.se/resolve?urn=urn:nbn:se:kth:diva-368254",
12
12
  type="MultilabelClassification",
13
13
  category="t2t",
@@ -7,8 +7,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
7
7
  class SwedishPatentCPCSubclassClassification(AbsTaskMultilabelClassification):
8
8
  metadata = TaskMetadata(
9
9
  name="SwedishPatentCPCSubclassClassification",
10
- description="""This dataset contains historical Swedish patent documents (1885-1972) classified according to the Cooperative Patent Classification (CPC) system. Each document can have multiple labels, making this a multi-label classification task with significant implications for patent retrieval and prior art search.
11
- The dataset includes patent claims text extracted from digitally recreated versions of historical Swedish patents, generated using Optical Character Recognition (OCR) from original paper documents. The text quality varies due to OCR limitations, but all CPC labels were manually assigned by patent engineers at PRV (Swedish Patent and Registration Office), ensuring high reliability for machine learning applications.""",
10
+ description="This dataset contains historical Swedish patent documents (1885-1972) classified according to the Cooperative Patent Classification (CPC) system. Each document can have multiple labels, making this a multi-label classification task with significant implications for patent retrieval and prior art search. The dataset includes patent claims text extracted from digitally recreated versions of historical Swedish patents, generated using Optical Character Recognition (OCR) from original paper documents. The text quality varies due to OCR limitations, but all CPC labels were manually assigned by patent engineers at PRV (Swedish Patent and Registration Office), ensuring high reliability for machine learning applications.",
12
11
  reference="https://urn.kb.se/resolve?urn=urn:nbn:se:kth:diva-368254",
13
12
  type="MultilabelClassification",
14
13
  category="t2t",
@@ -5,12 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
5
5
  class TalemaaderPC(AbsTaskPairClassification):
6
6
  metadata = TaskMetadata(
7
7
  name="TalemaaderPC",
8
- description="""\
9
- The Danish Language and Literature Society has developed a dataset for evaluating language models in Danish.
10
- The dataset contains a total of 1000 Danish idioms and fixed expressions with transferred meanings based on the Danish Dictionary's collection of fixed expressions with associated definitions.
11
- For each of the 1000 idioms and fixed expressions, three false definitions have also been prepared.
12
- The dataset can be used to test the performance of language models in identifying correct definitions for Danish idioms and fixed expressions.
13
- """,
8
+ description="\\ The Danish Language and Literature Society has developed a dataset for evaluating language models in Danish. The dataset contains a total of 1000 Danish idioms and fixed expressions with transferred meanings based on the Danish Dictionary's collection of fixed expressions with associated definitions. For each of the 1000 idioms and fixed expressions, three false definitions have also been prepared. The dataset can be used to test the performance of language models in identifying correct definitions for Danish idioms and fixed expressions.",
14
9
  reference="https://sprogteknologi.dk/dataset/1000-talemader-evalueringsdatasaet",
15
10
  dataset={
16
11
  "path": "mteb/talemaader_pc",
@@ -50,15 +50,7 @@ _DATASET_COLUMN_MAP = [
50
50
  class LegalBenchPC(AbsTaskPairClassification):
51
51
  metadata = TaskMetadata(
52
52
  name="LegalBenchPC",
53
- description="""This LegalBench pair classification task is a combination of the following datasets:
54
-
55
- - Citation Prediction Classification: Given a legal statement and a case citation, determine if the citation is supportive of the legal statement.
56
- - Consumer Contracts QA: The task consists of 400 yes/no questions relating to consumer contracts (specifically, online terms of service) and is relevant to the legal skill of contract interpretation.
57
- - Contract QA: Answer yes/no questions about whether contractual clauses discuss particular issues like confidentiality requirements, BIPA consent, PII data breaches, breach of contract etc.
58
- - Hearsay: Classify if a particular piece of evidence qualifies as hearsay. Each sample in the dataset describes (1) an issue being litigated or an assertion a party wishes to prove, and (2) a piece of evidence a party wishes to introduce. The goal is to determine if—as it relates to the issue—the evidence would be considered hearsay under the definition provided above.
59
- - Privacy Policy Entailment: Given a privacy policy clause and a description of the clause, determine if the description is correct. This is a binary classification task in which the LLM is provided with a clause from a privacy policy, and a description of that clause (e.g., “The policy describes collection of the user’s HTTP cookies, flash cookies, pixel tags, or similar identifiers by a party to the contract.”).
60
- - Privacy Policy QA: Given a question and a clause from a privacy policy, determine if the clause contains enough information to answer the question. This is a binary classification task in which the LLM is provided with a question (e.g., “do you publish my data”) and a clause from a privacy policy. The LLM must determine if the clause contains an answer to the question, and classify the question-clause pair.
61
- """,
53
+ description="This LegalBench pair classification task is a combination of the following datasets: - Citation Prediction Classification: Given a legal statement and a case citation, determine if the citation is supportive of the legal statement. - Consumer Contracts QA: The task consists of 400 yes/no questions relating to consumer contracts (specifically, online terms of service) and is relevant to the legal skill of contract interpretation. - Contract QA: Answer yes/no questions about whether contractual clauses discuss particular issues like confidentiality requirements, BIPA consent, PII data breaches, breach of contract etc. - Hearsay: Classify if a particular piece of evidence qualifies as hearsay. Each sample in the dataset describes (1) an issue being litigated or an assertion a party wishes to prove, and (2) a piece of evidence a party wishes to introduce. The goal is to determine if—as it relates to the issue—the evidence would be considered hearsay under the definition provided above. - Privacy Policy Entailment: Given a privacy policy clause and a description of the clause, determine if the description is correct. This is a binary classification task in which the LLM is provided with a clause from a privacy policy, and a description of that clause (e.g., “The policy describes collection of the user’s HTTP cookies, flash cookies, pixel tags, or similar identifiers by a party to the contract.”). - Privacy Policy QA: Given a question and a clause from a privacy policy, determine if the clause contains enough information to answer the question. This is a binary classification task in which the LLM is provided with a question (e.g., “do you publish my data”) and a clause from a privacy policy. The LLM must determine if the clause contains an answer to the question, and classify the question-clause pair.",
62
54
  reference="https://huggingface.co/datasets/nguha/legalbench",
63
55
  dataset={
64
56
  "path": "mteb/LegalBenchPC",
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
5
5
  class SprintDuplicateQuestionsPCVN(AbsTaskPairClassification):
6
6
  metadata = TaskMetadata(
7
7
  name="SprintDuplicateQuestions-VN",
8
- description="""A translated dataset from Duplicate questions from the Sprint community.
9
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
10
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
11
- - Applies advanced embedding models to filter the translations.
12
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
8
+ description="A translated dataset from Duplicate questions from the Sprint community. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
13
9
  reference="https://www.aclweb.org/anthology/D18-1131/",
14
10
  dataset={
15
11
  "path": "GreenNode/sprintduplicatequestions-pairclassification-vn",
@@ -9,11 +9,7 @@ class TwitterSemEval2015PCVN(AbsTaskPairClassification):
9
9
  "path": "GreenNode/twittersemeval2015-pairclassification-vn",
10
10
  "revision": "9215a3c954078fd15c2bbecca914477d53944de1",
11
11
  },
12
- description="""A translated dataset from Paraphrase-Pairs of Tweets from the SemEval 2015 workshop.
13
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
14
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
15
- - Applies advanced embedding models to filter the translations.
16
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
12
+ description="A translated dataset from Paraphrase-Pairs of Tweets from the SemEval 2015 workshop. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
17
13
  reference="https://alt.qcri.org/semeval2015/task1/",
18
14
  category="t2c",
19
15
  type="PairClassification",
@@ -9,11 +9,7 @@ class TwitterURLCorpusPC(AbsTaskPairClassification):
9
9
  "path": "GreenNode/twitterurlcorpus-pairclassification-vn",
10
10
  "revision": "6e6a40aaade2129f70432f2156a6d24b63d72be3",
11
11
  },
12
- description="""A translated dataset from Paraphrase-Pairs of Tweets.
13
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
14
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
15
- - Applies advanced embedding models to filter the translations.
16
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
12
+ description="A translated dataset from Paraphrase-Pairs of Tweets. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
17
13
  reference="https://languagenet.github.io/",
18
14
  category="t2c",
19
15
  type="PairClassification",
@@ -5,9 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
5
5
  class RuSciBenchCitedCountRegression(AbsTaskRegression):
6
6
  metadata = TaskMetadata(
7
7
  name="RuSciBenchCitedCountRegression",
8
- description="""Predicts the number of times a scientific article has been cited by other papers.
9
- The prediction is based on the article's title and abstract. The data is sourced from the Russian electronic
10
- library of scientific publications (eLibrary.ru) and includes papers with both Russian and English abstracts.""",
8
+ description="Predicts the number of times a scientific article has been cited by other papers. The prediction is based on the article's title and abstract. The data is sourced from the Russian electronic library of scientific publications (eLibrary.ru) and includes papers with both Russian and English abstracts.",
11
9
  reference="https://github.com/mlsa-iai-msu-lab/ru_sci_bench_mteb",
12
10
  dataset={
13
11
  "path": "mlsa-iai-msu-lab/ru_sci_bench_mteb",
@@ -51,9 +49,7 @@ class RuSciBenchCitedCountRegression(AbsTaskRegression):
51
49
  class RuSciBenchYearPublRegression(AbsTaskRegression):
52
50
  metadata = TaskMetadata(
53
51
  name="RuSciBenchYearPublRegression",
54
- description="""Predicts the publication year of a scientific article. The prediction is based on the
55
- article's title and abstract. The data is sourced from the Russian electronic library of scientific
56
- publications (eLibrary.ru) and includes papers with both Russian and English abstracts.""",
52
+ description="Predicts the publication year of a scientific article. The prediction is based on the article's title and abstract. The data is sourced from the Russian electronic library of scientific publications (eLibrary.ru) and includes papers with both Russian and English abstracts.",
57
53
  reference="https://github.com/mlsa-iai-msu-lab/ru_sci_bench_mteb",
58
54
  dataset={
59
55
  "path": "mlsa-iai-msu-lab/ru_sci_bench_mteb",
@@ -78,8 +78,7 @@ _CITATION = r"""
78
78
  class XGlueWPRReranking(AbsTaskRetrieval):
79
79
  metadata = TaskMetadata(
80
80
  name="XGlueWPRReranking",
81
- description="""XGLUE is a new benchmark dataset to evaluate the performance of cross-lingual pre-trained models
82
- with respect to cross-lingual natural language understanding and generation. XGLUE is composed of 11 tasks spans 19 languages.""",
81
+ description="XGLUE is a new benchmark dataset to evaluate the performance of cross-lingual pre-trained models with respect to cross-lingual natural language understanding and generation. XGLUE is composed of 11 tasks spans 19 languages.",
83
82
  reference="https://github.com/microsoft/XGLUE",
84
83
  dataset={
85
84
  "path": "mteb/XGlueWPRReranking",
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
5
5
  class AskUbuntuDupQuestionsVN(AbsTaskRetrieval):
6
6
  metadata = TaskMetadata(
7
7
  name="AskUbuntuDupQuestions-VN",
8
- description="""A translated dataset from AskUbuntu Question Dataset - Questions from AskUbuntu with manual annotations marking pairs of questions as similar or non-similar
9
- The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
10
- - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
11
- - Applies advanced embedding models to filter the translations.
12
- - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
8
+ description="A translated dataset from AskUbuntu Question Dataset - Questions from AskUbuntu with manual annotations marking pairs of questions as similar or non-similar The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
13
9
  reference="https://github.com/taolei87/askubuntu",
14
10
  dataset={
15
11
  "path": "mteb/AskUbuntuDupQuestions-VN",