mteb 2.1.4__py3-none-any.whl → 2.7.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/__init__.py +6 -0
- mteb/_create_dataloaders.py +22 -20
- mteb/_evaluators/any_sts_evaluator.py +23 -14
- mteb/_evaluators/classification_metrics.py +54 -0
- mteb/_evaluators/clustering_evaluator.py +3 -3
- mteb/_evaluators/evaluator.py +4 -2
- mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +18 -11
- mteb/_evaluators/pair_classification_evaluator.py +34 -40
- mteb/_evaluators/retrieval_evaluator.py +2 -2
- mteb/_evaluators/retrieval_metrics.py +18 -17
- mteb/_evaluators/sklearn_evaluator.py +25 -37
- mteb/_evaluators/text/bitext_mining_evaluator.py +31 -19
- mteb/_evaluators/text/summarization_evaluator.py +27 -20
- mteb/_evaluators/zeroshot_classification_evaluator.py +7 -5
- mteb/abstasks/_data_filter/__init__.py +0 -0
- mteb/abstasks/_data_filter/filters.py +125 -0
- mteb/abstasks/_data_filter/task_pipelines.py +105 -0
- mteb/abstasks/_statistics_calculation.py +23 -11
- mteb/abstasks/_stratification.py +18 -18
- mteb/abstasks/abstask.py +35 -28
- mteb/abstasks/aggregate_task_metadata.py +1 -9
- mteb/abstasks/aggregated_task.py +10 -29
- mteb/abstasks/classification.py +15 -12
- mteb/abstasks/clustering.py +20 -16
- mteb/abstasks/clustering_legacy.py +13 -10
- mteb/abstasks/image/image_text_pair_classification.py +7 -4
- mteb/abstasks/multilabel_classification.py +33 -22
- mteb/abstasks/pair_classification.py +27 -11
- mteb/abstasks/regression.py +4 -4
- mteb/abstasks/retrieval.py +28 -24
- mteb/abstasks/retrieval_dataset_loaders.py +2 -2
- mteb/abstasks/sts.py +14 -4
- mteb/abstasks/task_metadata.py +32 -33
- mteb/abstasks/text/bitext_mining.py +39 -28
- mteb/abstasks/text/reranking.py +8 -6
- mteb/abstasks/text/summarization.py +10 -5
- mteb/abstasks/zeroshot_classification.py +8 -4
- mteb/benchmarks/_create_table.py +84 -37
- mteb/benchmarks/benchmark.py +77 -16
- mteb/benchmarks/benchmarks/__init__.py +12 -0
- mteb/benchmarks/benchmarks/benchmarks.py +361 -16
- mteb/benchmarks/get_benchmark.py +14 -53
- mteb/cache.py +227 -37
- mteb/cli/_display_tasks.py +2 -2
- mteb/cli/build_cli.py +110 -14
- mteb/cli/generate_model_card.py +43 -23
- mteb/deprecated_evaluator.py +71 -62
- mteb/descriptive_stats/BitextMining/RuSciBenchBitextMining.v2.json +61 -0
- mteb/descriptive_stats/Classification/HebrewSentimentAnalysis.v3.json +60 -0
- mteb/descriptive_stats/Classification/TurkishConstitutionalCourtViolation.json +54 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2CybersecurityRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EconomicRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EnergyRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2HrRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3ComputerScienceRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3EnergyRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceEnRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceFrRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3HrRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3IndustrialRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3NuclearRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PharmaceuticalsRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PhysicsRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3TelecomRetrieval.json +214 -0
- mteb/descriptive_stats/PairClassification/TERRa.V2.json +35 -0
- mteb/descriptive_stats/Reranking/JQaRARerankingLite.json +35 -0
- mteb/descriptive_stats/Reranking/JaCWIRRerankingLite.json +35 -0
- mteb/descriptive_stats/Reranking/MultiLongDocReranking.json +466 -0
- mteb/descriptive_stats/Retrieval/ArguAna-NL.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/ChemRxivRetrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/EuroPIRQRetrieval.json +116 -0
- mteb/descriptive_stats/Retrieval/JaCWIRRetrievalLite.json +30 -0
- mteb/descriptive_stats/Retrieval/JaqketRetrievalLite.json +30 -0
- mteb/descriptive_stats/Retrieval/MIRACLJaRetrievalLite.json +30 -0
- mteb/descriptive_stats/Retrieval/MrTyDiJaRetrievalLite.json +30 -0
- mteb/descriptive_stats/Retrieval/NFCorpus-NL.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoClimateFEVER-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoDBPedia-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoFEVER-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoHotpotQA-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoMSMARCO-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoNQ-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/SCIDOCS-NL.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/SQuADKorV1Retrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/SciFact-NL.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/TVPLRetrieval.json +30 -0
- mteb/evaluate.py +106 -75
- mteb/filter_tasks.py +25 -26
- mteb/get_tasks.py +29 -30
- mteb/languages/language_scripts.py +5 -3
- mteb/leaderboard/app.py +414 -151
- mteb/leaderboard/benchmark_selector.py +14 -5
- mteb/leaderboard/figures.py +13 -15
- mteb/leaderboard/table.py +82 -17
- mteb/load_results.py +12 -12
- mteb/models/__init__.py +4 -1
- mteb/models/abs_encoder.py +31 -23
- mteb/models/cache_wrappers/__init__.py +2 -1
- mteb/models/cache_wrappers/cache_backend_protocol.py +3 -5
- mteb/models/cache_wrappers/cache_backends/_hash_utils.py +7 -6
- mteb/models/cache_wrappers/cache_backends/faiss_cache.py +6 -2
- mteb/models/cache_wrappers/cache_backends/numpy_cache.py +43 -25
- mteb/models/cache_wrappers/cache_wrapper.py +3 -3
- mteb/models/get_model_meta.py +25 -118
- mteb/models/instruct_wrapper.py +33 -9
- mteb/models/model_implementations/align_models.py +8 -1
- mteb/models/model_implementations/amazon_models.py +1 -0
- mteb/models/model_implementations/andersborges.py +65 -0
- mteb/models/model_implementations/ara_models.py +9 -1
- mteb/models/model_implementations/arctic_models.py +16 -8
- mteb/models/model_implementations/b1ade_models.py +2 -1
- mteb/models/model_implementations/bedrock_models.py +4 -0
- mteb/models/model_implementations/bge_models.py +101 -17
- mteb/models/model_implementations/bica_model.py +35 -0
- mteb/models/model_implementations/blip2_models.py +13 -2
- mteb/models/model_implementations/blip_models.py +43 -16
- mteb/models/model_implementations/bm25.py +5 -4
- mteb/models/model_implementations/bmretriever_models.py +10 -4
- mteb/models/model_implementations/cadet_models.py +10 -1
- mteb/models/model_implementations/cde_models.py +25 -4
- mteb/models/model_implementations/clip_models.py +9 -6
- mteb/models/model_implementations/clips_models.py +100 -0
- mteb/models/model_implementations/codefuse_models.py +165 -3
- mteb/models/model_implementations/codesage_models.py +18 -3
- mteb/models/model_implementations/cohere_models.py +13 -6
- mteb/models/model_implementations/cohere_v.py +7 -2
- mteb/models/model_implementations/colpali_models.py +17 -9
- mteb/models/model_implementations/colqwen_models.py +275 -5
- mteb/models/model_implementations/colsmol_models.py +4 -2
- mteb/models/model_implementations/conan_models.py +2 -1
- mteb/models/model_implementations/dino_models.py +194 -23
- mteb/models/model_implementations/e5_instruct.py +27 -4
- mteb/models/model_implementations/e5_models.py +21 -110
- mteb/models/model_implementations/e5_v.py +7 -6
- mteb/models/model_implementations/eagerworks_models.py +164 -0
- mteb/models/model_implementations/emillykkejensen_models.py +91 -0
- mteb/models/model_implementations/en_code_retriever.py +2 -1
- mteb/models/model_implementations/euler_models.py +32 -0
- mteb/models/model_implementations/evaclip_models.py +4 -0
- mteb/models/model_implementations/fa_models.py +67 -9
- mteb/models/model_implementations/facebookai.py +205 -0
- mteb/models/model_implementations/geogpt_models.py +2 -1
- mteb/models/model_implementations/gme_v_models.py +17 -10
- mteb/models/model_implementations/google_models.py +17 -6
- mteb/models/model_implementations/granite_vision_embedding_models.py +8 -3
- mteb/models/model_implementations/gritlm_models.py +4 -2
- mteb/models/model_implementations/gte_models.py +99 -9
- mteb/models/model_implementations/hinvec_models.py +2 -1
- mteb/models/model_implementations/human.py +1 -0
- mteb/models/model_implementations/ibm_granite_models.py +36 -6
- mteb/models/model_implementations/inf_models.py +4 -2
- mteb/models/model_implementations/jasper_models.py +256 -3
- mteb/models/model_implementations/jina_clip.py +49 -10
- mteb/models/model_implementations/jina_models.py +222 -11
- mteb/models/model_implementations/kalm_models.py +203 -25
- mteb/models/model_implementations/kblab.py +37 -0
- mteb/models/model_implementations/kennethenevoldsen_models.py +74 -0
- mteb/models/model_implementations/kfst.py +25 -0
- mteb/models/model_implementations/kowshik24_models.py +32 -0
- mteb/models/model_implementations/lens_models.py +2 -0
- mteb/models/model_implementations/lgai_embedding_models.py +2 -1
- mteb/models/model_implementations/linq_models.py +4 -3
- mteb/models/model_implementations/listconranker.py +2 -2
- mteb/models/model_implementations/llm2clip_models.py +9 -6
- mteb/models/model_implementations/llm2vec_models.py +16 -8
- mteb/models/model_implementations/mcinext_models.py +7 -1
- mteb/models/model_implementations/mdbr_models.py +19 -3
- mteb/models/model_implementations/misc_models.py +422 -60
- mteb/models/model_implementations/mixedbread_ai_models.py +332 -0
- mteb/models/model_implementations/mme5_models.py +2 -1
- mteb/models/model_implementations/moco_models.py +15 -4
- mteb/models/model_implementations/mod_models.py +191 -0
- mteb/models/model_implementations/model2vec_models.py +27 -14
- mteb/models/model_implementations/moka_models.py +4 -1
- mteb/models/model_implementations/nbailab.py +70 -0
- mteb/models/model_implementations/no_instruct_sentence_models.py +3 -2
- mteb/models/model_implementations/nomic_models.py +173 -6
- mteb/models/model_implementations/nomic_models_vision.py +8 -3
- mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +32 -19
- mteb/models/model_implementations/nvidia_models.py +155 -20
- mteb/models/model_implementations/octen_models.py +254 -0
- mteb/models/model_implementations/openai_models.py +20 -16
- mteb/models/model_implementations/openclip_models.py +37 -13
- mteb/models/model_implementations/opensearch_neural_sparse_models.py +10 -5
- mteb/models/model_implementations/ops_moa_models.py +5 -3
- mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +1 -1
- mteb/models/model_implementations/pawan_models.py +39 -0
- mteb/models/model_implementations/piccolo_models.py +9 -1
- mteb/models/model_implementations/pixie_models.py +56 -0
- mteb/models/model_implementations/promptriever_models.py +12 -8
- mteb/models/model_implementations/pylate_models.py +46 -12
- mteb/models/model_implementations/qodo_models.py +4 -2
- mteb/models/model_implementations/qtack_models.py +2 -1
- mteb/models/model_implementations/qwen3_models.py +9 -6
- mteb/models/model_implementations/qzhou_models.py +5 -3
- mteb/models/model_implementations/random_baseline.py +19 -24
- mteb/models/model_implementations/rasgaard_models.py +34 -0
- mteb/models/model_implementations/reasonir_model.py +2 -1
- mteb/models/model_implementations/repllama_models.py +5 -3
- mteb/models/model_implementations/rerankers_custom.py +15 -9
- mteb/models/model_implementations/rerankers_monot5_based.py +31 -31
- mteb/models/model_implementations/richinfoai_models.py +2 -1
- mteb/models/model_implementations/ru_sentence_models.py +71 -20
- mteb/models/model_implementations/ruri_models.py +322 -0
- mteb/models/model_implementations/salesforce_models.py +6 -3
- mteb/models/model_implementations/samilpwc_models.py +2 -1
- mteb/models/model_implementations/sarashina_embedding_models.py +168 -0
- mteb/models/model_implementations/searchmap_models.py +2 -1
- mteb/models/model_implementations/seed_1_6_embedding_models.py +8 -2
- mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +625 -0
- mteb/models/model_implementations/seed_models.py +1 -0
- mteb/models/model_implementations/sentence_transformers_models.py +177 -18
- mteb/models/model_implementations/shuu_model.py +32 -31
- mteb/models/model_implementations/siglip_models.py +30 -20
- mteb/models/model_implementations/slm_models.py +416 -0
- mteb/models/model_implementations/sonar_models.py +1 -0
- mteb/models/model_implementations/spartan8806_atles_champion.py +34 -0
- mteb/models/model_implementations/stella_models.py +23 -4
- mteb/models/model_implementations/tarka_models.py +376 -0
- mteb/models/model_implementations/text2vec_models.py +9 -3
- mteb/models/model_implementations/ua_sentence_models.py +11 -1
- mteb/models/model_implementations/uae_models.py +8 -1
- mteb/models/model_implementations/vdr_models.py +3 -1
- mteb/models/model_implementations/vi_vn_models.py +45 -6
- mteb/models/model_implementations/vista_models.py +2 -0
- mteb/models/model_implementations/vlm2vec_models.py +5 -3
- mteb/models/model_implementations/voyage_models.py +99 -0
- mteb/models/model_implementations/voyage_v.py +17 -9
- mteb/models/model_implementations/xyz_models.py +1 -0
- mteb/models/model_implementations/youtu_models.py +2 -1
- mteb/models/model_implementations/yuan_models.py +34 -0
- mteb/models/model_implementations/yuan_models_en.py +58 -0
- mteb/models/model_meta.py +498 -29
- mteb/models/models_protocols.py +22 -6
- mteb/models/search_encoder_index/__init__.py +7 -0
- mteb/models/search_encoder_index/search_backend_protocol.py +50 -0
- mteb/models/search_encoder_index/search_indexes/__init__.py +5 -0
- mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +160 -0
- mteb/models/search_wrappers.py +197 -65
- mteb/models/sentence_transformer_wrapper.py +52 -32
- mteb/models/vllm_wrapper.py +327 -0
- mteb/py.typed +0 -0
- mteb/results/benchmark_results.py +114 -65
- mteb/results/model_result.py +63 -26
- mteb/results/task_result.py +117 -77
- mteb/similarity_functions.py +60 -7
- mteb/tasks/bitext_mining/multilingual/__init__.py +2 -1
- mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +4 -2
- mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining_fast.py +1 -1
- mteb/tasks/bitext_mining/multilingual/ru_sci_bench_bitext_mining.py +47 -5
- mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -6
- mteb/tasks/classification/ara/ajgt.py +1 -2
- mteb/tasks/classification/ara/hotel_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/ara/tweet_emotion_classification.py +1 -2
- mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -2
- mteb/tasks/classification/ben/bengali_document_classification.py +1 -2
- mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -2
- mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -2
- mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/ces/czech_product_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/ces/czech_so_me_sentiment_classification.py +1 -2
- mteb/tasks/classification/dan/angry_tweets_classification.py +1 -2
- mteb/tasks/classification/dan/danish_political_comments_classification.py +1 -2
- mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -2
- mteb/tasks/classification/dan/dk_hate_classification.py +2 -3
- mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -2
- mteb/tasks/classification/deu/ten_k_gnad_classification.py +1 -2
- mteb/tasks/classification/eng/amazon_polarity_classification.py +1 -2
- mteb/tasks/classification/eng/arxiv_classification.py +1 -2
- mteb/tasks/classification/eng/banking77_classification.py +1 -2
- mteb/tasks/classification/eng/dbpedia_classification.py +1 -2
- mteb/tasks/classification/eng/emotion_classification.py +1 -2
- mteb/tasks/classification/eng/financial_phrasebank_classification.py +1 -2
- mteb/tasks/classification/eng/frenk_en_classification.py +1 -2
- mteb/tasks/classification/eng/gtsrb_classification.py +1 -1
- mteb/tasks/classification/eng/imdb_classification.py +1 -2
- mteb/tasks/classification/eng/legal_bench_classification.py +14 -120
- mteb/tasks/classification/eng/news_classification.py +1 -2
- mteb/tasks/classification/eng/patch_camelyon_classification.py +1 -1
- mteb/tasks/classification/eng/patent_classification.py +1 -2
- mteb/tasks/classification/eng/poem_sentiment_classification.py +1 -2
- mteb/tasks/classification/eng/sds_eye_protection_classification.py +1 -2
- mteb/tasks/classification/eng/sds_gloves_classification.py +1 -2
- mteb/tasks/classification/eng/toxic_chat_classification.py +2 -19
- mteb/tasks/classification/eng/toxic_conversations_classification.py +1 -2
- mteb/tasks/classification/eng/tweet_sentiment_extraction_classification.py +1 -2
- mteb/tasks/classification/eng/tweet_topic_single_classification.py +2 -13
- mteb/tasks/classification/eng/ucf101_classification.py +1 -5
- mteb/tasks/classification/eng/wikipedia_bio_met_chem_classification.py +1 -2
- mteb/tasks/classification/eng/wikipedia_chem_fields_classification.py +1 -2
- mteb/tasks/classification/eng/wikipedia_comp_chem_spectroscopy_classification.py +1 -2
- mteb/tasks/classification/eng/wikipedia_crystallography_analytical_classification.py +1 -2
- mteb/tasks/classification/eng/wikipedia_theoretical_applied_classification.py +1 -2
- mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -2
- mteb/tasks/classification/eng/yelp_review_full_classification.py +1 -2
- mteb/tasks/classification/est/estonian_valence.py +2 -3
- mteb/tasks/classification/fas/fa_mteb_classification.py +7 -14
- mteb/tasks/classification/fil/filipino_hate_speech_classification.py +1 -2
- mteb/tasks/classification/fin/fin_toxicity_classification.py +2 -11
- mteb/tasks/classification/fra/french_book_reviews.py +1 -2
- mteb/tasks/classification/fra/movie_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/guj/gujarati_news_classification.py +1 -2
- mteb/tasks/classification/heb/__init__.py +6 -1
- mteb/tasks/classification/heb/hebrew_sentiment_analysis.py +62 -4
- mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -2
- mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -2
- mteb/tasks/classification/hrv/frenk_hr_classification.py +1 -2
- mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +1 -2
- mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -2
- mteb/tasks/classification/ita/italian_linguist_acceptability_classification.py +1 -2
- mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -2
- mteb/tasks/classification/jpn/wrime_classification.py +1 -2
- mteb/tasks/classification/kan/kannada_news_classification.py +1 -2
- mteb/tasks/classification/kor/klue_tc.py +1 -2
- mteb/tasks/classification/kor/kor_hate_classification.py +2 -17
- mteb/tasks/classification/kor/kor_sarcasm_classification.py +2 -19
- mteb/tasks/classification/kur/kurdish_sentiment_classification.py +3 -4
- mteb/tasks/classification/mal/malayalam_news_classification.py +1 -2
- mteb/tasks/classification/mar/marathi_news_classification.py +1 -2
- mteb/tasks/classification/mkd/macedonian_tweet_sentiment_classification.py +1 -2
- mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -6
- mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -4
- mteb/tasks/classification/multilingual/ru_sci_bench_classification.py +4 -23
- mteb/tasks/classification/multilingual/scala_classification.py +2 -3
- mteb/tasks/classification/multilingual/sib200_classification.py +1 -6
- mteb/tasks/classification/mya/myanmar_news.py +1 -2
- mteb/tasks/classification/nep/nepali_news_classification.py +1 -2
- mteb/tasks/classification/nld/dutch_book_review_sentiment_classification.py +4 -2
- mteb/tasks/classification/nld/dutch_cola_classification.py +3 -0
- mteb/tasks/classification/nld/dutch_government_bias_classification.py +3 -0
- mteb/tasks/classification/nld/dutch_news_articles_classification.py +3 -0
- mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +3 -0
- mteb/tasks/classification/nld/iconclass_classification.py +3 -0
- mteb/tasks/classification/nld/open_tender_classification.py +3 -0
- mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +3 -0
- mteb/tasks/classification/nob/no_rec_classification.py +1 -2
- mteb/tasks/classification/nob/norwegian_parliament_classification.py +1 -2
- mteb/tasks/classification/ory/odia_news_classification.py +1 -2
- mteb/tasks/classification/pol/polish_classification.py +3 -6
- mteb/tasks/classification/ron/moroco.py +1 -2
- mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -2
- mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -2
- mteb/tasks/classification/rus/georeview_classification.py +1 -2
- mteb/tasks/classification/rus/headline_classification.py +1 -2
- mteb/tasks/classification/rus/inappropriateness_classification.py +1 -2
- mteb/tasks/classification/rus/ru_reviews_classification.py +1 -2
- mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -2
- mteb/tasks/classification/rus/senti_ru_eval.py +1 -2
- mteb/tasks/classification/sin/sinhala_news_classification.py +1 -2
- mteb/tasks/classification/sin/sinhala_news_source_classification.py +1 -2
- mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/slk/slovak_hate_speech_classification.py +1 -2
- mteb/tasks/classification/slk/slovak_movie_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/slv/frenk_sl_classification.py +1 -2
- mteb/tasks/classification/spa/spanish_news_classification.py +1 -2
- mteb/tasks/classification/spa/spanish_sentiment_classification.py +1 -2
- mteb/tasks/classification/ssw/siswati_news_classification.py +1 -2
- mteb/tasks/classification/swa/swahili_news_classification.py +1 -2
- mteb/tasks/classification/swe/dalaj_classification.py +1 -2
- mteb/tasks/classification/swe/swe_rec_classification.py +1 -2
- mteb/tasks/classification/swe/swedish_sentiment_classification.py +1 -2
- mteb/tasks/classification/tam/tamil_news_classification.py +1 -2
- mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +1 -2
- mteb/tasks/classification/tha/wisesight_sentiment_classification.py +1 -2
- mteb/tasks/classification/tsn/tswana_news_classification.py +1 -2
- mteb/tasks/classification/tur/__init__.py +4 -0
- mteb/tasks/classification/tur/turkish_constitutional_court.py +41 -0
- mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +1 -2
- mteb/tasks/classification/tur/turkish_product_sentiment_classification.py +1 -2
- mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -15
- mteb/tasks/classification/urd/urdu_roman_sentiment_classification.py +1 -2
- mteb/tasks/classification/vie/amazon_counterfactual_vn_classification.py +1 -6
- mteb/tasks/classification/vie/amazon_polarity_vn_classification.py +1 -6
- mteb/tasks/classification/vie/amazon_reviews_vn_classification.py +1 -5
- mteb/tasks/classification/vie/banking77_vn_classification.py +1 -5
- mteb/tasks/classification/vie/emotion_vn_classification.py +1 -5
- mteb/tasks/classification/vie/imdb_vn_classification.py +1 -5
- mteb/tasks/classification/vie/massive_intent_vn_classification.py +1 -5
- mteb/tasks/classification/vie/massive_scenario_vn_classification.py +1 -5
- mteb/tasks/classification/vie/mtop_domain_vn_classification.py +1 -5
- mteb/tasks/classification/vie/mtop_intent_vn_classification.py +1 -5
- mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -5
- mteb/tasks/classification/vie/tweet_sentiment_extraction_vn_classification.py +1 -5
- mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -2
- mteb/tasks/classification/zho/cmteb_classification.py +5 -10
- mteb/tasks/classification/zho/yue_openrice_review_classification.py +1 -2
- mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -2
- mteb/tasks/clustering/eng/hume_wiki_cities_clustering.py +1 -1
- mteb/tasks/clustering/eng/wiki_cities_clustering.py +1 -1
- mteb/tasks/clustering/jpn/mews_c16_ja_clustering.py +1 -3
- mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -6
- mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +3 -0
- mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +3 -0
- mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +3 -0
- mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +3 -0
- mteb/tasks/clustering/nld/open_tender_clustering_s2s.py +3 -0
- mteb/tasks/clustering/nld/vabb_clustering_p2p.py +3 -0
- mteb/tasks/clustering/nld/vabb_clustering_s2s.py +3 -0
- mteb/tasks/clustering/vie/reddit_clustering_p2p_vn.py +1 -5
- mteb/tasks/clustering/vie/reddit_clustering_vn.py +1 -5
- mteb/tasks/clustering/vie/stack_exchange_clustering_p2p_vn.py +1 -5
- mteb/tasks/clustering/vie/stack_exchange_clustering_vn.py +1 -5
- mteb/tasks/clustering/vie/twenty_newsgroups_clustering_vn.py +1 -5
- mteb/tasks/clustering/zho/cmteb_clustering.py +2 -2
- mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +1 -1
- mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -5
- mteb/tasks/multilabel_classification/kor/kor_hate_speech_ml_classification.py +1 -9
- mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -6
- mteb/tasks/multilabel_classification/nld/covid_disinformation_nl_multi_label_classification.py +3 -0
- mteb/tasks/multilabel_classification/nld/vabb_multi_label_classification.py +3 -0
- mteb/tasks/multilabel_classification/por/brazilian_toxic_tweets_classification.py +1 -6
- mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
- mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -2
- mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -6
- mteb/tasks/pair_classification/eng/legal_bench_pc.py +1 -9
- mteb/tasks/pair_classification/nld/sick_nl_pair_classification.py +3 -0
- mteb/tasks/pair_classification/nld/xlwic_nl_pair_classification.py +3 -0
- mteb/tasks/pair_classification/rus/__init__.py +2 -2
- mteb/tasks/pair_classification/rus/terra.py +51 -25
- mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -5
- mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -5
- mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -5
- mteb/tasks/regression/multilingual/ru_sci_bench_regression.py +2 -6
- mteb/tasks/reranking/jpn/__init__.py +9 -1
- mteb/tasks/reranking/jpn/j_qa_ra_reranking_lite.py +49 -0
- mteb/tasks/reranking/jpn/ja_cwir_reranking_lite.py +47 -0
- mteb/tasks/reranking/multilingual/__init__.py +2 -0
- mteb/tasks/reranking/multilingual/multi_long_doc_reranking.py +70 -0
- mteb/tasks/reranking/multilingual/wikipedia_reranking_multilingual.py +1 -1
- mteb/tasks/reranking/multilingual/x_glue_wpr_reranking.py +1 -2
- mteb/tasks/reranking/vie/ask_ubuntu_dup_questions_vn.py +1 -5
- mteb/tasks/reranking/vie/sci_docs_reranking_vn.py +1 -5
- mteb/tasks/reranking/vie/stack_overflow_dup_questions_vn.py +1 -5
- mteb/tasks/retrieval/code/code_rag.py +12 -12
- mteb/tasks/retrieval/code/fresh_stack_retrieval.py +8 -5
- mteb/tasks/retrieval/dan/dan_fever_retrieval.py +1 -1
- mteb/tasks/retrieval/dan/tv2_nordretrieval.py +2 -2
- mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +2 -2
- mteb/tasks/retrieval/eng/__init__.py +2 -0
- mteb/tasks/retrieval/eng/chemrxiv.py +33 -0
- mteb/tasks/retrieval/eng/cub200_i2i_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -8
- mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +4 -0
- mteb/tasks/retrieval/jpn/__init__.py +8 -0
- mteb/tasks/retrieval/jpn/ja_cwir_retrieval.py +1 -4
- mteb/tasks/retrieval/jpn/ja_cwir_retrieval_lite.py +47 -0
- mteb/tasks/retrieval/jpn/jaqket_retrieval_lite.py +50 -0
- mteb/tasks/retrieval/jpn/miracl_ja_retrieval_lite.py +52 -0
- mteb/tasks/retrieval/jpn/mr_tydi_ja_retrieval_lite.py +48 -0
- mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +11 -4
- mteb/tasks/retrieval/kor/__init__.py +16 -1
- mteb/tasks/retrieval/kor/kovidore2_bench_retrieval.py +142 -0
- mteb/tasks/retrieval/kor/squad_kor_v1_retrieval.py +47 -0
- mteb/tasks/retrieval/multilingual/__init__.py +24 -0
- mteb/tasks/retrieval/multilingual/belebele_retrieval.py +5 -4
- mteb/tasks/retrieval/multilingual/euro_pirq_retrieval.py +43 -0
- mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +56 -42
- mteb/tasks/retrieval/multilingual/mkqa_retrieval.py +1 -2
- mteb/tasks/retrieval/multilingual/mlqa_retrieval.py +1 -4
- mteb/tasks/retrieval/multilingual/multi_long_doc_retrieval.py +1 -2
- mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +9 -4
- mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +2 -12
- mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +4 -2
- mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +389 -0
- mteb/tasks/retrieval/nld/__init__.py +8 -4
- mteb/tasks/retrieval/nld/argu_ana_nl_retrieval.py +46 -27
- mteb/tasks/retrieval/nld/bbsard_nl_retrieval.py +3 -0
- mteb/tasks/retrieval/nld/dutch_news_articles_retrieval.py +3 -0
- mteb/tasks/retrieval/nld/legal_qa_nl_retrieval.py +3 -0
- mteb/tasks/retrieval/nld/nf_corpus_nl_retrieval.py +42 -25
- mteb/tasks/retrieval/nld/open_tender_retrieval.py +3 -0
- mteb/tasks/retrieval/nld/sci_fact_nl_retrieval.py +42 -24
- mteb/tasks/retrieval/nld/scidocsnl_retrieval.py +44 -27
- mteb/tasks/retrieval/nld/vabb_retrieval.py +3 -0
- mteb/tasks/retrieval/nob/norquad.py +2 -2
- mteb/tasks/retrieval/nob/snl_retrieval.py +2 -2
- mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -7
- mteb/tasks/retrieval/tur/tur_hist_quad.py +1 -1
- mteb/tasks/retrieval/vie/__init__.py +14 -6
- mteb/tasks/retrieval/vie/argu_ana_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +40 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_android_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_gis_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_mathematica_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_physics_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_programmers_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_stats_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_tex_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_unix_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_webmasters_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_wordpress_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +40 -5
- mteb/tasks/retrieval/vie/fevervn_retrieval.py +40 -7
- mteb/tasks/retrieval/vie/fi_qa2018_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/green_node_table_markdown_retrieval.py +16 -1
- mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +40 -6
- mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +49 -5
- mteb/tasks/retrieval/vie/nf_corpus_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/nqvn_retrieval.py +40 -5
- mteb/tasks/retrieval/vie/quora_vn_retrieval.py +1 -6
- mteb/tasks/retrieval/vie/sci_fact_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/scidocsvn_retrieval.py +1 -6
- mteb/tasks/retrieval/vie/touche2020_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/treccovidvn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/tvpl_retrieval.py +42 -0
- mteb/tasks/retrieval/vie/zac_legal_text_retrieval.py +15 -1
- mteb/tasks/sts/nld/sick_nl_sts.py +1 -0
- mteb/tasks/sts/vie/biosses_stsvn.py +1 -5
- mteb/tasks/sts/vie/sickr_stsvn.py +1 -5
- mteb/tasks/sts/vie/sts_benchmark_stsvn.py +1 -5
- mteb/tasks/zeroshot_classification/eng/gtsrb.py +1 -1
- mteb/tasks/zeroshot_classification/eng/patch_camelyon.py +1 -1
- mteb/tasks/zeroshot_classification/eng/ucf101.py +1 -5
- mteb/types/__init__.py +2 -0
- mteb/types/_encoder_io.py +19 -2
- mteb/types/_result.py +2 -1
- mteb/types/statistics.py +9 -3
- {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/METADATA +25 -8
- {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/RECORD +525 -438
- mteb/models/model_implementations/mxbai_models.py +0 -102
- mteb/models/model_implementations/nb_sbert.py +0 -25
- {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/WHEEL +0 -0
- {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/entry_points.txt +0 -0
- {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/top_level.txt +0 -0
|
@@ -5,13 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
|
|
|
5
5
|
class UkrFormalityClassification(AbsTaskClassification):
|
|
6
6
|
metadata = TaskMetadata(
|
|
7
7
|
name="UkrFormalityClassification",
|
|
8
|
-
description=""
|
|
9
|
-
This dataset contains Ukrainian Formality Classification dataset obtained by
|
|
10
|
-
trainslating English GYAFC data.
|
|
11
|
-
English data source: https://aclanthology.org/N18-1012/
|
|
12
|
-
Translation into Ukrainian language using model: https://huggingface.co/facebook/nllb-200-distilled-600M
|
|
13
|
-
Additionally, the dataset was balanced, with labels: 0 - informal, 1 - formal.
|
|
14
|
-
""",
|
|
8
|
+
description="This dataset contains Ukrainian Formality Classification dataset obtained by trainslating English GYAFC data. English data source: https://aclanthology.org/N18-1012/ Translation into Ukrainian language using model: https://huggingface.co/facebook/nllb-200-distilled-600M Additionally, the dataset was balanced, with labels: 0 - informal, 1 - formal.",
|
|
15
9
|
dataset={
|
|
16
10
|
"path": "ukr-detect/ukr-formality-dataset-translated-gyafc",
|
|
17
11
|
"revision": "671d1e6bbf45a74ef21af351fd4ef7b32b7856f8",
|
|
@@ -56,14 +50,7 @@ Tetreault, Joel},
|
|
|
56
50
|
class UkrFormalityClassificationV2(AbsTaskClassification):
|
|
57
51
|
metadata = TaskMetadata(
|
|
58
52
|
name="UkrFormalityClassification.v2",
|
|
59
|
-
description=""
|
|
60
|
-
This dataset contains Ukrainian Formality Classification dataset obtained by
|
|
61
|
-
trainslating English GYAFC data.
|
|
62
|
-
English data source: https://aclanthology.org/N18-1012/
|
|
63
|
-
Translation into Ukrainian language using model: https://huggingface.co/facebook/nllb-200-distilled-600M
|
|
64
|
-
Additionally, the dataset was balanced, with labels: 0 - informal, 1 - formal.
|
|
65
|
-
|
|
66
|
-
This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
|
|
53
|
+
description="This dataset contains Ukrainian Formality Classification dataset obtained by trainslating English GYAFC data. English data source: https://aclanthology.org/N18-1012/ Translation into Ukrainian language using model: https://huggingface.co/facebook/nllb-200-distilled-600M Additionally, the dataset was balanced, with labels: 0 - informal, 1 - formal. This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
|
|
67
54
|
dataset={
|
|
68
55
|
"path": "mteb/ukr_formality",
|
|
69
56
|
"revision": "e0b2dfa57d505f207deb571e58b0bd0b81180bd4",
|
|
@@ -40,8 +40,7 @@ class UrduRomanSentimentClassification(AbsTaskClassification):
|
|
|
40
40
|
class UrduRomanSentimentClassificationV2(AbsTaskClassification):
|
|
41
41
|
metadata = TaskMetadata(
|
|
42
42
|
name="UrduRomanSentimentClassification.v2",
|
|
43
|
-
description="
|
|
44
|
-
This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
|
|
43
|
+
description="The Roman Urdu dataset is a data corpus comprising of more than 20000 records tagged for sentiment (Positive, Negative, Neutral) This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
|
|
45
44
|
reference="https://archive.ics.uci.edu/dataset/458/roman+urdu+data+set",
|
|
46
45
|
dataset={
|
|
47
46
|
"path": "mteb/urdu_roman_sentiment",
|
|
@@ -11,12 +11,7 @@ class AmazonCounterfactualVNClassification(AbsTaskClassification):
|
|
|
11
11
|
"path": "GreenNode/amazon-counterfactual-vn",
|
|
12
12
|
"revision": "b48bc27d383cfca5b6a47135a52390fa5f66b253",
|
|
13
13
|
},
|
|
14
|
-
description="
|
|
15
|
-
The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
|
|
16
|
-
- The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
|
|
17
|
-
- Applies advanced embedding models to filter the translations.
|
|
18
|
-
- Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.
|
|
19
|
-
""",
|
|
14
|
+
description="A collection of translated Amazon customer reviews annotated for counterfactual detection pair classification. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
|
|
20
15
|
reference="https://arxiv.org/abs/2104.06893",
|
|
21
16
|
category="t2c",
|
|
22
17
|
type="Classification",
|
|
@@ -5,12 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
|
|
|
5
5
|
class AmazonPolarityVNClassification(AbsTaskClassification):
|
|
6
6
|
metadata = TaskMetadata(
|
|
7
7
|
name="AmazonPolarityVNClassification",
|
|
8
|
-
description="
|
|
9
|
-
The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
|
|
10
|
-
- The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
|
|
11
|
-
- Applies advanced embedding models to filter the translations.
|
|
12
|
-
- Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.
|
|
13
|
-
""",
|
|
8
|
+
description="A collection of translated Amazon customer reviews annotated for polarity classification. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
|
|
14
9
|
reference="https://huggingface.co/datasets/amazon_polarity",
|
|
15
10
|
dataset={
|
|
16
11
|
"path": "GreenNode/amazon-polarity-vn",
|
|
@@ -9,11 +9,7 @@ class AmazonReviewsVNClassification(AbsTaskClassification):
|
|
|
9
9
|
"path": "GreenNode/amazon-reviews-multi-vn",
|
|
10
10
|
"revision": "27da94deb6d4f44af789a3d70750fa506b79f189",
|
|
11
11
|
},
|
|
12
|
-
description="
|
|
13
|
-
The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
|
|
14
|
-
- The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
|
|
15
|
-
- Applies advanced embedding models to filter the translations.
|
|
16
|
-
- Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
|
|
12
|
+
description="A collection of translated Amazon reviews specifically designed to aid research in multilingual text classification. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
|
|
17
13
|
reference="https://arxiv.org/abs/2010.02573",
|
|
18
14
|
category="t2c",
|
|
19
15
|
type="Classification",
|
|
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
|
|
|
5
5
|
class Banking77VNClassification(AbsTaskClassification):
|
|
6
6
|
metadata = TaskMetadata(
|
|
7
7
|
name="Banking77VNClassification",
|
|
8
|
-
description="
|
|
9
|
-
The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
|
|
10
|
-
- The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
|
|
11
|
-
- Applies advanced embedding models to filter the translations.
|
|
12
|
-
- Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
|
|
8
|
+
description="A translated dataset composed of online banking queries annotated with their corresponding intents. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
|
|
13
9
|
reference="https://arxiv.org/abs/2003.04807",
|
|
14
10
|
dataset={
|
|
15
11
|
"path": "GreenNode/banking77-vn",
|
|
@@ -7,11 +7,7 @@ class EmotionVNClassification(AbsTaskClassification):
|
|
|
7
7
|
|
|
8
8
|
metadata = TaskMetadata(
|
|
9
9
|
name="EmotionVNClassification",
|
|
10
|
-
description="
|
|
11
|
-
The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
|
|
12
|
-
- The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
|
|
13
|
-
- Applies advanced embedding models to filter the translations.
|
|
14
|
-
- Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
|
|
10
|
+
description="Emotion is a translated dataset of Vietnamese from English Twitter messages with six basic emotions: anger, fear, joy, love, sadness, and surprise. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
|
|
15
11
|
reference="https://www.aclweb.org/anthology/D18-1404",
|
|
16
12
|
dataset={
|
|
17
13
|
"path": "GreenNode/emotion-vn",
|
|
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
|
|
|
5
5
|
class ImdbVNClassification(AbsTaskClassification):
|
|
6
6
|
metadata = TaskMetadata(
|
|
7
7
|
name="ImdbVNClassification",
|
|
8
|
-
description="
|
|
9
|
-
The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
|
|
10
|
-
- The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
|
|
11
|
-
- Applies advanced embedding models to filter the translations.
|
|
12
|
-
- Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
|
|
8
|
+
description="A translated dataset of large movie reviews annotated for sentiment classification. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
|
|
13
9
|
dataset={
|
|
14
10
|
"path": "GreenNode/imdb-vn",
|
|
15
11
|
"revision": "0dccb383ee26c90c99d03c8674cf40de642f099a",
|
|
@@ -9,11 +9,7 @@ class MassiveIntentVNClassification(AbsTaskClassification):
|
|
|
9
9
|
"path": "GreenNode/amazon-massive-intent-vn",
|
|
10
10
|
"revision": "35c7ced69f958dbbaa24f792db4a9250e461866d",
|
|
11
11
|
},
|
|
12
|
-
description="
|
|
13
|
-
The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
|
|
14
|
-
- The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
|
|
15
|
-
- Applies advanced embedding models to filter the translations.
|
|
16
|
-
- Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
|
|
12
|
+
description="A translated dataset from MASSIVE: A 1M-Example Multilingual Natural Language Understanding Dataset with 51 Typologically-Diverse Languages The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
|
|
17
13
|
reference="https://arxiv.org/abs/2204.08582#:~:text=MASSIVE%20contains%201M%20realistic%2C%20parallel,diverse%20languages%20from%2029%20genera.",
|
|
18
14
|
category="t2c",
|
|
19
15
|
type="Classification",
|
|
@@ -9,11 +9,7 @@ class MassiveScenarioVNClassification(AbsTaskClassification):
|
|
|
9
9
|
"path": "GreenNode/amazon-massive-scenario-vn",
|
|
10
10
|
"revision": "a82e282d9f5aec1a8cf7d868ce40f70669c16b89",
|
|
11
11
|
},
|
|
12
|
-
description="
|
|
13
|
-
The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
|
|
14
|
-
- The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
|
|
15
|
-
- Applies advanced embedding models to filter the translations.
|
|
16
|
-
- Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
|
|
12
|
+
description="A translated dataset from MASSIVE: A 1M-Example Multilingual Natural Language Understanding Dataset with 51 Typologically-Diverse Languages The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
|
|
17
13
|
reference="https://arxiv.org/abs/2204.08582#:~:text=MASSIVE%20contains%201M%20realistic%2C%20parallel,diverse%20languages%20from%2029%20genera.",
|
|
18
14
|
category="t2c",
|
|
19
15
|
type="Classification",
|
|
@@ -9,11 +9,7 @@ class MTOPDomainVNClassification(AbsTaskClassification):
|
|
|
9
9
|
"path": "GreenNode/mtop-domain-vn",
|
|
10
10
|
"revision": "6e1ec8c54c018151c77472d94b1c0765230cf6ca",
|
|
11
11
|
},
|
|
12
|
-
description="
|
|
13
|
-
The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
|
|
14
|
-
- The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
|
|
15
|
-
- Applies advanced embedding models to filter the translations.
|
|
16
|
-
- Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
|
|
12
|
+
description="A translated dataset from MTOP: Multilingual Task-Oriented Semantic Parsing The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
|
|
17
13
|
reference="https://arxiv.org/pdf/2008.09335.pdf",
|
|
18
14
|
category="t2c",
|
|
19
15
|
type="Classification",
|
|
@@ -9,11 +9,7 @@ class MTOPIntentVNClassification(AbsTaskClassification):
|
|
|
9
9
|
"path": "GreenNode/mtop-intent-vn",
|
|
10
10
|
"revision": "c4e81a5c9a813a0142d905e261e5a446cc6fbc4a",
|
|
11
11
|
},
|
|
12
|
-
description="
|
|
13
|
-
The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
|
|
14
|
-
- The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
|
|
15
|
-
- Applies advanced embedding models to filter the translations.
|
|
16
|
-
- Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
|
|
12
|
+
description="A translated dataset from MTOP: Multilingual Task-Oriented Semantic Parsing The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
|
|
17
13
|
reference="https://arxiv.org/pdf/2008.09335.pdf",
|
|
18
14
|
category="t2c",
|
|
19
15
|
type="Classification",
|
|
@@ -7,11 +7,7 @@ class ToxicConversationsVNClassification(AbsTaskClassification):
|
|
|
7
7
|
|
|
8
8
|
metadata = TaskMetadata(
|
|
9
9
|
name="ToxicConversationsVNClassification",
|
|
10
|
-
description="
|
|
11
|
-
The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
|
|
12
|
-
- The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
|
|
13
|
-
- Applies advanced embedding models to filter the translations.
|
|
14
|
-
- Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
|
|
10
|
+
description="A translated dataset from Collection of comments from the Civil Comments platform together with annotations if the comment is toxic or not. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
|
|
15
11
|
reference="https://www.kaggle.com/competitions/jigsaw-unintended-bias-in-toxicity-classification/overview",
|
|
16
12
|
dataset={
|
|
17
13
|
"path": "GreenNode/toxic-conversations-50k-vn",
|
|
@@ -7,11 +7,7 @@ class TweetSentimentExtractionVNClassification(AbsTaskClassification):
|
|
|
7
7
|
|
|
8
8
|
metadata = TaskMetadata(
|
|
9
9
|
name="TweetSentimentExtractionVNClassification",
|
|
10
|
-
description="
|
|
11
|
-
The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
|
|
12
|
-
- The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
|
|
13
|
-
- Applies advanced embedding models to filter the translations.
|
|
14
|
-
- Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
|
|
10
|
+
description="A collection of translated tweets annotated for sentiment extraction. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
|
|
15
11
|
reference="https://www.kaggle.com/competitions/tweet-sentiment-extraction/overview",
|
|
16
12
|
dataset={
|
|
17
13
|
"path": "GreenNode/tweet-sentiment-extraction-vn",
|
|
@@ -45,8 +45,7 @@ class VieStudentFeedbackClassification(AbsTaskClassification):
|
|
|
45
45
|
class VieStudentFeedbackClassificationV2(AbsTaskClassification):
|
|
46
46
|
metadata = TaskMetadata(
|
|
47
47
|
name="VieStudentFeedbackClassification.v2",
|
|
48
|
-
description="
|
|
49
|
-
This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
|
|
48
|
+
description="A Vietnamese dataset for classification of student feedback This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
|
|
50
49
|
reference="https://ieeexplore.ieee.org/document/8573337",
|
|
51
50
|
dataset={
|
|
52
51
|
"path": "mteb/vie_student_feedback",
|
|
@@ -79,8 +79,7 @@ Lan, Zhenzhong },
|
|
|
79
79
|
class TNewsV2(AbsTaskClassification):
|
|
80
80
|
metadata = TaskMetadata(
|
|
81
81
|
name="TNews.v2",
|
|
82
|
-
description="
|
|
83
|
-
This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
|
|
82
|
+
description="Short Text Classification for News This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
|
|
84
83
|
reference="https://www.cluebenchmarks.com/introduce.html",
|
|
85
84
|
dataset={
|
|
86
85
|
"path": "mteb/t_news",
|
|
@@ -229,8 +228,7 @@ Lan, Zhenzhong },
|
|
|
229
228
|
class IFlyTekV2(AbsTaskClassification):
|
|
230
229
|
metadata = TaskMetadata(
|
|
231
230
|
name="IFlyTek.v2",
|
|
232
|
-
description="
|
|
233
|
-
This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
|
|
231
|
+
description="Long Text classification for the description of Apps This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
|
|
234
232
|
reference="https://www.cluebenchmarks.com/introduce.html",
|
|
235
233
|
dataset={
|
|
236
234
|
"path": "mteb/i_fly_tek",
|
|
@@ -335,8 +333,7 @@ class MultilingualSentiment(AbsTaskClassification):
|
|
|
335
333
|
class MultilingualSentimentV2(AbsTaskClassification):
|
|
336
334
|
metadata = TaskMetadata(
|
|
337
335
|
name="MultilingualSentiment.v2",
|
|
338
|
-
description="
|
|
339
|
-
This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
|
|
336
|
+
description="A collection of multilingual sentiments datasets grouped into 3 classes -- positive, neutral, negative This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
|
|
340
337
|
reference="https://github.com/tyqiangz/multilingual-sentiment-datasets",
|
|
341
338
|
dataset={
|
|
342
339
|
"path": "mteb/multilingual_sentiment",
|
|
@@ -403,8 +400,7 @@ class JDReview(AbsTaskClassification):
|
|
|
403
400
|
class JDReviewV2(AbsTaskClassification):
|
|
404
401
|
metadata = TaskMetadata(
|
|
405
402
|
name="JDReview.v2",
|
|
406
|
-
description="
|
|
407
|
-
This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
|
|
403
|
+
description="review for iphone This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
|
|
408
404
|
reference="https://aclanthology.org/2023.nodalida-1.20/",
|
|
409
405
|
dataset={
|
|
410
406
|
"path": "mteb/jd_review",
|
|
@@ -514,8 +510,7 @@ class Waimai(AbsTaskClassification):
|
|
|
514
510
|
class WaimaiV2(AbsTaskClassification):
|
|
515
511
|
metadata = TaskMetadata(
|
|
516
512
|
name="Waimai.v2",
|
|
517
|
-
description="
|
|
518
|
-
This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
|
|
513
|
+
description="Sentiment Analysis of user reviews on takeaway platforms This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
|
|
519
514
|
reference="https://aclanthology.org/2023.nodalida-1.20/",
|
|
520
515
|
dataset={
|
|
521
516
|
"path": "mteb/waimai",
|
|
@@ -48,8 +48,7 @@ class YueOpenriceReviewClassification(AbsTaskClassification):
|
|
|
48
48
|
class YueOpenriceReviewClassificationV2(AbsTaskClassification):
|
|
49
49
|
metadata = TaskMetadata(
|
|
50
50
|
name="YueOpenriceReviewClassification.v2",
|
|
51
|
-
description="
|
|
52
|
-
This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
|
|
51
|
+
description="A Cantonese dataset for review classification This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
|
|
53
52
|
reference="https://github.com/Christainx/Dataset_Cantonese_Openrice",
|
|
54
53
|
dataset={
|
|
55
54
|
"path": "mteb/yue_openrice_review",
|
|
@@ -45,8 +45,7 @@ class IsiZuluNewsClassification(AbsTaskClassification):
|
|
|
45
45
|
class IsiZuluNewsClassificationV2(AbsTaskClassification):
|
|
46
46
|
metadata = TaskMetadata(
|
|
47
47
|
name="IsiZuluNewsClassification.v2",
|
|
48
|
-
description="
|
|
49
|
-
This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
|
|
48
|
+
description="isiZulu News Classification Dataset This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
|
|
50
49
|
reference="https://huggingface.co/datasets/dsfsi/za-isizulu-siswati-news",
|
|
51
50
|
dataset={
|
|
52
51
|
"path": "mteb/isi_zulu_news",
|
|
@@ -25,7 +25,7 @@ class HUMEWikiCitiesClustering(AbsTaskClusteringLegacy):
|
|
|
25
25
|
dialect=[],
|
|
26
26
|
sample_creation="found",
|
|
27
27
|
bibtex_citation=r"""
|
|
28
|
-
@online{
|
|
28
|
+
@online{wikidump2024,
|
|
29
29
|
author = {Wikimedia Foundation},
|
|
30
30
|
title = {Wikimedia Downloads},
|
|
31
31
|
url = {https://dumps.wikimedia.org},
|
|
@@ -25,7 +25,7 @@ class WikiCitiesClustering(AbsTaskClusteringLegacy):
|
|
|
25
25
|
dialect=[],
|
|
26
26
|
sample_creation="found",
|
|
27
27
|
bibtex_citation=r"""
|
|
28
|
-
@online{
|
|
28
|
+
@online{wikidump2024,
|
|
29
29
|
author = {Wikimedia Foundation},
|
|
30
30
|
title = {Wikimedia Downloads},
|
|
31
31
|
url = {https://dumps.wikimedia.org},
|
|
@@ -8,9 +8,7 @@ class MewsC16JaClustering(AbsTaskClustering):
|
|
|
8
8
|
|
|
9
9
|
metadata = TaskMetadata(
|
|
10
10
|
name="MewsC16JaClustering",
|
|
11
|
-
description="
|
|
12
|
-
This dataset is the Japanese split of MewsC-16, containing topic sentences from Wikinews articles in 12 categories.
|
|
13
|
-
More detailed information is available in the Appendix E of the citation.""",
|
|
11
|
+
description="MewsC-16 (Multilingual Short Text Clustering Dataset for News in 16 languages) is constructed from Wikinews. This dataset is the Japanese split of MewsC-16, containing topic sentences from Wikinews articles in 12 categories. More detailed information is available in the Appendix E of the citation.",
|
|
14
12
|
reference="https://github.com/sbintuitions/JMTEB",
|
|
15
13
|
dataset={
|
|
16
14
|
"path": "mteb/MewsC16JaClustering",
|
|
@@ -210,12 +210,7 @@ class SIB200ClusteringFast(AbsTaskClustering):
|
|
|
210
210
|
|
|
211
211
|
metadata = TaskMetadata(
|
|
212
212
|
name="SIB200ClusteringS2S",
|
|
213
|
-
description="
|
|
214
|
-
dataset based on Flores-200 covering 205 languages and dialects annotated. The dataset is
|
|
215
|
-
annotated in English for the topics, science/technology, travel, politics, sports,
|
|
216
|
-
health, entertainment, and geography. The labels are then transferred to the other languages
|
|
217
|
-
in Flores-200 which are human-translated.
|
|
218
|
-
""",
|
|
213
|
+
description="SIB-200 is the largest publicly available topic classification dataset based on Flores-200 covering 205 languages and dialects annotated. The dataset is annotated in English for the topics, science/technology, travel, politics, sports, health, entertainment, and geography. The labels are then transferred to the other languages in Flores-200 which are human-translated.",
|
|
219
214
|
reference="https://arxiv.org/abs/2309.07445",
|
|
220
215
|
dataset={
|
|
221
216
|
"path": "mteb/sib200",
|
|
@@ -28,6 +28,9 @@ class DutchNewsArticlesClusteringP2P(AbsTaskClustering):
|
|
|
28
28
|
dialect=[],
|
|
29
29
|
sample_creation="found",
|
|
30
30
|
bibtex_citation="",
|
|
31
|
+
prompt={
|
|
32
|
+
"query": "Identificeer de hoofdcategorie van nieuwsartikelen op basis van de titels en de inhoud"
|
|
33
|
+
},
|
|
31
34
|
)
|
|
32
35
|
|
|
33
36
|
def dataset_transform(self):
|
|
@@ -28,6 +28,9 @@ class DutchNewsArticlesClusteringS2S(AbsTaskClustering):
|
|
|
28
28
|
dialect=[],
|
|
29
29
|
sample_creation="found",
|
|
30
30
|
bibtex_citation="",
|
|
31
|
+
prompt={
|
|
32
|
+
"query": "Identificeer de hoofdcategorie van nieuwsartikelen op basis van de titels"
|
|
33
|
+
},
|
|
31
34
|
)
|
|
32
35
|
|
|
33
36
|
def dataset_transform(self):
|
|
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
|
|
|
5
5
|
class RedditClusteringP2PVN(AbsTaskClusteringLegacy):
|
|
6
6
|
metadata = TaskMetadata(
|
|
7
7
|
name="RedditClusteringP2P-VN",
|
|
8
|
-
description="
|
|
9
|
-
The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
|
|
10
|
-
- The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
|
|
11
|
-
- Applies advanced embedding models to filter the translations.
|
|
12
|
-
- Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
|
|
8
|
+
description="A translated dataset from Clustering of title+posts from reddit. Clustering of 10 sets of 50k paragraphs and 40 sets of 10k paragraphs. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
|
|
13
9
|
reference="https://arxiv.org/abs/2104.07081",
|
|
14
10
|
dataset={
|
|
15
11
|
"path": "GreenNode/reddit-clustering-p2p-vn",
|
|
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
|
|
|
5
5
|
class RedditClusteringVN(AbsTaskClusteringLegacy):
|
|
6
6
|
metadata = TaskMetadata(
|
|
7
7
|
name="RedditClustering-VN",
|
|
8
|
-
description="
|
|
9
|
-
The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
|
|
10
|
-
- The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
|
|
11
|
-
- Applies advanced embedding models to filter the translations.
|
|
12
|
-
- Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
|
|
8
|
+
description="A translated dataset from Clustering of titles from 199 subreddits. Clustering of 25 sets, each with 10-50 classes, and each class with 100 - 1000 sentences. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
|
|
13
9
|
reference="https://arxiv.org/abs/2104.07081",
|
|
14
10
|
dataset={
|
|
15
11
|
"path": "GreenNode/reddit-clustering-vn",
|
|
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
|
|
|
5
5
|
class StackExchangeClusteringP2PVN(AbsTaskClusteringLegacy):
|
|
6
6
|
metadata = TaskMetadata(
|
|
7
7
|
name="StackExchangeClusteringP2P-VN",
|
|
8
|
-
description="
|
|
9
|
-
The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
|
|
10
|
-
- The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
|
|
11
|
-
- Applies advanced embedding models to filter the translations.
|
|
12
|
-
- Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
|
|
8
|
+
description="A translated Clustering of title+body from stackexchange. Clustering of 5 sets of 10k paragraphs and 5 sets of 5k paragraphs. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
|
|
13
9
|
reference="https://arxiv.org/abs/2104.07081",
|
|
14
10
|
dataset={
|
|
15
11
|
"path": "GreenNode/stackexchange-clustering-p2p-vn",
|
|
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
|
|
|
5
5
|
class StackExchangeClusteringVN(AbsTaskClusteringLegacy):
|
|
6
6
|
metadata = TaskMetadata(
|
|
7
7
|
name="StackExchangeClustering-VN",
|
|
8
|
-
description="
|
|
9
|
-
The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
|
|
10
|
-
- The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
|
|
11
|
-
- Applies advanced embedding models to filter the translations.
|
|
12
|
-
- Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
|
|
8
|
+
description="A translated dataset from Clustering of titles from 121 stackexchanges. Clustering of 25 sets, each with 10-50 classes, and each class with 100 - 1000 sentences. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
|
|
13
9
|
reference="https://arxiv.org/abs/2104.07081",
|
|
14
10
|
dataset={
|
|
15
11
|
"path": "GreenNode/stackexchange-clustering-vn",
|
|
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
|
|
|
5
5
|
class TwentyNewsgroupsClusteringVN(AbsTaskClusteringLegacy):
|
|
6
6
|
metadata = TaskMetadata(
|
|
7
7
|
name="TwentyNewsgroupsClustering-VN",
|
|
8
|
-
description="
|
|
9
|
-
The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
|
|
10
|
-
- The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
|
|
11
|
-
- Applies advanced embedding models to filter the translations.
|
|
12
|
-
- Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
|
|
8
|
+
description="A translated dataset from Clustering of the 20 Newsgroups dataset (subject only). The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
|
|
13
9
|
reference="https://scikit-learn.org/0.19/datasets/twenty_newsgroups.html",
|
|
14
10
|
dataset={
|
|
15
11
|
"path": "GreenNode/twentynewsgroups-clustering-vn",
|
|
@@ -226,7 +226,7 @@ class ThuNewsClusteringFastS2S(AbsTaskClustering):
|
|
|
226
226
|
dialect=[],
|
|
227
227
|
sample_creation="found",
|
|
228
228
|
bibtex_citation=r"""
|
|
229
|
-
@software{
|
|
229
|
+
@software{sun2016thuctc,
|
|
230
230
|
author = {Sun, M. and Li, J. and Guo, Z. and Yu, Z. and Zheng, Y. and Si, X. and Liu, Z.},
|
|
231
231
|
note = {THU Chinese Text Classification Toolkit},
|
|
232
232
|
publisher = {THU Natural Language Processing Lab},
|
|
@@ -285,7 +285,7 @@ class ThuNewsClusteringFastP2P(AbsTaskClustering):
|
|
|
285
285
|
dialect=[],
|
|
286
286
|
sample_creation="found",
|
|
287
287
|
bibtex_citation=r"""
|
|
288
|
-
@software{
|
|
288
|
+
@software{sun2016thuctc,
|
|
289
289
|
author = {Sun, M. and Li, J. and Guo, Z. and Yu, Z. and Zheng, Y. and Si, X. and Liu, Z.},
|
|
290
290
|
note = {THU Chinese Text Classification Toolkit},
|
|
291
291
|
publisher = {THU Natural Language Processing Lab},
|
|
@@ -49,7 +49,7 @@ class SugarCrepe(AbsTaskImageTextPairClassification):
|
|
|
49
49
|
"""Load dataset from HuggingFace hub"""
|
|
50
50
|
if self.data_loaded:
|
|
51
51
|
return
|
|
52
|
-
self.dataset = datasets.load_dataset(**self.metadata.dataset)
|
|
52
|
+
self.dataset = datasets.load_dataset(**self.metadata.dataset)
|
|
53
53
|
self.dataset = datasets.DatasetDict({"test": self.dataset["train"]})
|
|
54
54
|
self.dataset_transform()
|
|
55
55
|
self.data_loaded = True
|
|
@@ -7,11 +7,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
|
|
|
7
7
|
class EmitClassification(AbsTaskMultilabelClassification):
|
|
8
8
|
metadata = TaskMetadata(
|
|
9
9
|
name="EmitClassification",
|
|
10
|
-
description="
|
|
11
|
-
The EMit dataset consists of social media messages about TV shows, TV series, music videos, and advertisements.
|
|
12
|
-
Each message is annotated with one or more of the 8 primary emotions defined by Plutchik
|
|
13
|
-
(anger, anticipation, disgust, fear, joy, sadness, surprise, trust), as well as an additional label “love.”
|
|
14
|
-
""",
|
|
10
|
+
description="The EMit dataset is a comprehensive resource for the detection of emotions in Italian social media texts. The EMit dataset consists of social media messages about TV shows, TV series, music videos, and advertisements. Each message is annotated with one or more of the 8 primary emotions defined by Plutchik (anger, anticipation, disgust, fear, joy, sadness, surprise, trust), as well as an additional label “love.”",
|
|
15
11
|
reference="https://github.com/oaraque/emit",
|
|
16
12
|
dataset={
|
|
17
13
|
"path": "MattiaSangermano/emit",
|
|
@@ -7,15 +7,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
|
|
|
7
7
|
class KorHateSpeechMLClassification(AbsTaskMultilabelClassification):
|
|
8
8
|
metadata = TaskMetadata(
|
|
9
9
|
name="KorHateSpeechMLClassification",
|
|
10
|
-
description=""
|
|
11
|
-
The Korean Multi-label Hate Speech Dataset, K-MHaS, consists of 109,692 utterances from Korean online news comments,
|
|
12
|
-
labelled with 8 fine-grained hate speech classes (labels: Politics, Origin, Physical, Age, Gender, Religion, Race, Profanity)
|
|
13
|
-
or Not Hate Speech class. Each utterance provides from a single to four labels that can handles Korean language patterns effectively.
|
|
14
|
-
For more details, please refer to the paper about K-MHaS, published at COLING 2022.
|
|
15
|
-
This dataset is based on the Korean online news comments available on Kaggle and Github.
|
|
16
|
-
The unlabeled raw data was collected between January 2018 and June 2020.
|
|
17
|
-
The language producers are users who left the comments on the Korean online news platform between 2018 and 2020.
|
|
18
|
-
""",
|
|
10
|
+
description="The Korean Multi-label Hate Speech Dataset, K-MHaS, consists of 109,692 utterances from Korean online news comments, labelled with 8 fine-grained hate speech classes (labels: Politics, Origin, Physical, Age, Gender, Religion, Race, Profanity) or Not Hate Speech class. Each utterance provides from a single to four labels that can handles Korean language patterns effectively. For more details, please refer to the paper about K-MHaS, published at COLING 2022. This dataset is based on the Korean online news comments available on Kaggle and Github. The unlabeled raw data was collected between January 2018 and June 2020. The language producers are users who left the comments on the Korean online news platform between 2018 and 2020.",
|
|
19
11
|
dataset={
|
|
20
12
|
"path": "mteb/KorHateSpeechMLClassification",
|
|
21
13
|
"revision": "47cd2e61b64f2f11ccb006a579cda71318c6de9b",
|
|
@@ -7,12 +7,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
|
|
|
7
7
|
class MalteseNewsClassification(AbsTaskMultilabelClassification):
|
|
8
8
|
metadata = TaskMetadata(
|
|
9
9
|
name="MalteseNewsClassification",
|
|
10
|
-
description="
|
|
11
|
-
Articles. The data was collected from the press_mt subset from Korpus
|
|
12
|
-
Malti v4.0. Article contents were cleaned to filter out JavaScript, CSS,
|
|
13
|
-
& repeated non-Maltese sub-headings. The labels are based on the category
|
|
14
|
-
field from this corpus.
|
|
15
|
-
""",
|
|
10
|
+
description="A multi-label topic classification dataset for Maltese News Articles. The data was collected from the press_mt subset from Korpus Malti v4.0. Article contents were cleaned to filter out JavaScript, CSS, & repeated non-Maltese sub-headings. The labels are based on the category field from this corpus.",
|
|
16
11
|
reference="https://huggingface.co/datasets/MLRS/maltese_news_categories",
|
|
17
12
|
dataset={
|
|
18
13
|
"path": "MLRS/maltese_news_categories",
|