mteb 2.1.4__py3-none-any.whl → 2.7.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/__init__.py +6 -0
- mteb/_create_dataloaders.py +22 -20
- mteb/_evaluators/any_sts_evaluator.py +23 -14
- mteb/_evaluators/classification_metrics.py +54 -0
- mteb/_evaluators/clustering_evaluator.py +3 -3
- mteb/_evaluators/evaluator.py +4 -2
- mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +18 -11
- mteb/_evaluators/pair_classification_evaluator.py +34 -40
- mteb/_evaluators/retrieval_evaluator.py +2 -2
- mteb/_evaluators/retrieval_metrics.py +18 -17
- mteb/_evaluators/sklearn_evaluator.py +25 -37
- mteb/_evaluators/text/bitext_mining_evaluator.py +31 -19
- mteb/_evaluators/text/summarization_evaluator.py +27 -20
- mteb/_evaluators/zeroshot_classification_evaluator.py +7 -5
- mteb/abstasks/_data_filter/__init__.py +0 -0
- mteb/abstasks/_data_filter/filters.py +125 -0
- mteb/abstasks/_data_filter/task_pipelines.py +105 -0
- mteb/abstasks/_statistics_calculation.py +23 -11
- mteb/abstasks/_stratification.py +18 -18
- mteb/abstasks/abstask.py +35 -28
- mteb/abstasks/aggregate_task_metadata.py +1 -9
- mteb/abstasks/aggregated_task.py +10 -29
- mteb/abstasks/classification.py +15 -12
- mteb/abstasks/clustering.py +20 -16
- mteb/abstasks/clustering_legacy.py +13 -10
- mteb/abstasks/image/image_text_pair_classification.py +7 -4
- mteb/abstasks/multilabel_classification.py +33 -22
- mteb/abstasks/pair_classification.py +27 -11
- mteb/abstasks/regression.py +4 -4
- mteb/abstasks/retrieval.py +28 -24
- mteb/abstasks/retrieval_dataset_loaders.py +2 -2
- mteb/abstasks/sts.py +14 -4
- mteb/abstasks/task_metadata.py +32 -33
- mteb/abstasks/text/bitext_mining.py +39 -28
- mteb/abstasks/text/reranking.py +8 -6
- mteb/abstasks/text/summarization.py +10 -5
- mteb/abstasks/zeroshot_classification.py +8 -4
- mteb/benchmarks/_create_table.py +84 -37
- mteb/benchmarks/benchmark.py +77 -16
- mteb/benchmarks/benchmarks/__init__.py +12 -0
- mteb/benchmarks/benchmarks/benchmarks.py +361 -16
- mteb/benchmarks/get_benchmark.py +14 -53
- mteb/cache.py +227 -37
- mteb/cli/_display_tasks.py +2 -2
- mteb/cli/build_cli.py +110 -14
- mteb/cli/generate_model_card.py +43 -23
- mteb/deprecated_evaluator.py +71 -62
- mteb/descriptive_stats/BitextMining/RuSciBenchBitextMining.v2.json +61 -0
- mteb/descriptive_stats/Classification/HebrewSentimentAnalysis.v3.json +60 -0
- mteb/descriptive_stats/Classification/TurkishConstitutionalCourtViolation.json +54 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2CybersecurityRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EconomicRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EnergyRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2HrRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3ComputerScienceRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3EnergyRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceEnRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceFrRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3HrRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3IndustrialRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3NuclearRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PharmaceuticalsRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PhysicsRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3TelecomRetrieval.json +214 -0
- mteb/descriptive_stats/PairClassification/TERRa.V2.json +35 -0
- mteb/descriptive_stats/Reranking/JQaRARerankingLite.json +35 -0
- mteb/descriptive_stats/Reranking/JaCWIRRerankingLite.json +35 -0
- mteb/descriptive_stats/Reranking/MultiLongDocReranking.json +466 -0
- mteb/descriptive_stats/Retrieval/ArguAna-NL.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/ChemRxivRetrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/EuroPIRQRetrieval.json +116 -0
- mteb/descriptive_stats/Retrieval/JaCWIRRetrievalLite.json +30 -0
- mteb/descriptive_stats/Retrieval/JaqketRetrievalLite.json +30 -0
- mteb/descriptive_stats/Retrieval/MIRACLJaRetrievalLite.json +30 -0
- mteb/descriptive_stats/Retrieval/MrTyDiJaRetrievalLite.json +30 -0
- mteb/descriptive_stats/Retrieval/NFCorpus-NL.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoClimateFEVER-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoDBPedia-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoFEVER-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoHotpotQA-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoMSMARCO-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoNQ-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/SCIDOCS-NL.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/SQuADKorV1Retrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/SciFact-NL.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/TVPLRetrieval.json +30 -0
- mteb/evaluate.py +106 -75
- mteb/filter_tasks.py +25 -26
- mteb/get_tasks.py +29 -30
- mteb/languages/language_scripts.py +5 -3
- mteb/leaderboard/app.py +414 -151
- mteb/leaderboard/benchmark_selector.py +14 -5
- mteb/leaderboard/figures.py +13 -15
- mteb/leaderboard/table.py +82 -17
- mteb/load_results.py +12 -12
- mteb/models/__init__.py +4 -1
- mteb/models/abs_encoder.py +31 -23
- mteb/models/cache_wrappers/__init__.py +2 -1
- mteb/models/cache_wrappers/cache_backend_protocol.py +3 -5
- mteb/models/cache_wrappers/cache_backends/_hash_utils.py +7 -6
- mteb/models/cache_wrappers/cache_backends/faiss_cache.py +6 -2
- mteb/models/cache_wrappers/cache_backends/numpy_cache.py +43 -25
- mteb/models/cache_wrappers/cache_wrapper.py +3 -3
- mteb/models/get_model_meta.py +25 -118
- mteb/models/instruct_wrapper.py +33 -9
- mteb/models/model_implementations/align_models.py +8 -1
- mteb/models/model_implementations/amazon_models.py +1 -0
- mteb/models/model_implementations/andersborges.py +65 -0
- mteb/models/model_implementations/ara_models.py +9 -1
- mteb/models/model_implementations/arctic_models.py +16 -8
- mteb/models/model_implementations/b1ade_models.py +2 -1
- mteb/models/model_implementations/bedrock_models.py +4 -0
- mteb/models/model_implementations/bge_models.py +101 -17
- mteb/models/model_implementations/bica_model.py +35 -0
- mteb/models/model_implementations/blip2_models.py +13 -2
- mteb/models/model_implementations/blip_models.py +43 -16
- mteb/models/model_implementations/bm25.py +5 -4
- mteb/models/model_implementations/bmretriever_models.py +10 -4
- mteb/models/model_implementations/cadet_models.py +10 -1
- mteb/models/model_implementations/cde_models.py +25 -4
- mteb/models/model_implementations/clip_models.py +9 -6
- mteb/models/model_implementations/clips_models.py +100 -0
- mteb/models/model_implementations/codefuse_models.py +165 -3
- mteb/models/model_implementations/codesage_models.py +18 -3
- mteb/models/model_implementations/cohere_models.py +13 -6
- mteb/models/model_implementations/cohere_v.py +7 -2
- mteb/models/model_implementations/colpali_models.py +17 -9
- mteb/models/model_implementations/colqwen_models.py +275 -5
- mteb/models/model_implementations/colsmol_models.py +4 -2
- mteb/models/model_implementations/conan_models.py +2 -1
- mteb/models/model_implementations/dino_models.py +194 -23
- mteb/models/model_implementations/e5_instruct.py +27 -4
- mteb/models/model_implementations/e5_models.py +21 -110
- mteb/models/model_implementations/e5_v.py +7 -6
- mteb/models/model_implementations/eagerworks_models.py +164 -0
- mteb/models/model_implementations/emillykkejensen_models.py +91 -0
- mteb/models/model_implementations/en_code_retriever.py +2 -1
- mteb/models/model_implementations/euler_models.py +32 -0
- mteb/models/model_implementations/evaclip_models.py +4 -0
- mteb/models/model_implementations/fa_models.py +67 -9
- mteb/models/model_implementations/facebookai.py +205 -0
- mteb/models/model_implementations/geogpt_models.py +2 -1
- mteb/models/model_implementations/gme_v_models.py +17 -10
- mteb/models/model_implementations/google_models.py +17 -6
- mteb/models/model_implementations/granite_vision_embedding_models.py +8 -3
- mteb/models/model_implementations/gritlm_models.py +4 -2
- mteb/models/model_implementations/gte_models.py +99 -9
- mteb/models/model_implementations/hinvec_models.py +2 -1
- mteb/models/model_implementations/human.py +1 -0
- mteb/models/model_implementations/ibm_granite_models.py +36 -6
- mteb/models/model_implementations/inf_models.py +4 -2
- mteb/models/model_implementations/jasper_models.py +256 -3
- mteb/models/model_implementations/jina_clip.py +49 -10
- mteb/models/model_implementations/jina_models.py +222 -11
- mteb/models/model_implementations/kalm_models.py +203 -25
- mteb/models/model_implementations/kblab.py +37 -0
- mteb/models/model_implementations/kennethenevoldsen_models.py +74 -0
- mteb/models/model_implementations/kfst.py +25 -0
- mteb/models/model_implementations/kowshik24_models.py +32 -0
- mteb/models/model_implementations/lens_models.py +2 -0
- mteb/models/model_implementations/lgai_embedding_models.py +2 -1
- mteb/models/model_implementations/linq_models.py +4 -3
- mteb/models/model_implementations/listconranker.py +2 -2
- mteb/models/model_implementations/llm2clip_models.py +9 -6
- mteb/models/model_implementations/llm2vec_models.py +16 -8
- mteb/models/model_implementations/mcinext_models.py +7 -1
- mteb/models/model_implementations/mdbr_models.py +19 -3
- mteb/models/model_implementations/misc_models.py +422 -60
- mteb/models/model_implementations/mixedbread_ai_models.py +332 -0
- mteb/models/model_implementations/mme5_models.py +2 -1
- mteb/models/model_implementations/moco_models.py +15 -4
- mteb/models/model_implementations/mod_models.py +191 -0
- mteb/models/model_implementations/model2vec_models.py +27 -14
- mteb/models/model_implementations/moka_models.py +4 -1
- mteb/models/model_implementations/nbailab.py +70 -0
- mteb/models/model_implementations/no_instruct_sentence_models.py +3 -2
- mteb/models/model_implementations/nomic_models.py +173 -6
- mteb/models/model_implementations/nomic_models_vision.py +8 -3
- mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +32 -19
- mteb/models/model_implementations/nvidia_models.py +155 -20
- mteb/models/model_implementations/octen_models.py +254 -0
- mteb/models/model_implementations/openai_models.py +20 -16
- mteb/models/model_implementations/openclip_models.py +37 -13
- mteb/models/model_implementations/opensearch_neural_sparse_models.py +10 -5
- mteb/models/model_implementations/ops_moa_models.py +5 -3
- mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +1 -1
- mteb/models/model_implementations/pawan_models.py +39 -0
- mteb/models/model_implementations/piccolo_models.py +9 -1
- mteb/models/model_implementations/pixie_models.py +56 -0
- mteb/models/model_implementations/promptriever_models.py +12 -8
- mteb/models/model_implementations/pylate_models.py +46 -12
- mteb/models/model_implementations/qodo_models.py +4 -2
- mteb/models/model_implementations/qtack_models.py +2 -1
- mteb/models/model_implementations/qwen3_models.py +9 -6
- mteb/models/model_implementations/qzhou_models.py +5 -3
- mteb/models/model_implementations/random_baseline.py +19 -24
- mteb/models/model_implementations/rasgaard_models.py +34 -0
- mteb/models/model_implementations/reasonir_model.py +2 -1
- mteb/models/model_implementations/repllama_models.py +5 -3
- mteb/models/model_implementations/rerankers_custom.py +15 -9
- mteb/models/model_implementations/rerankers_monot5_based.py +31 -31
- mteb/models/model_implementations/richinfoai_models.py +2 -1
- mteb/models/model_implementations/ru_sentence_models.py +71 -20
- mteb/models/model_implementations/ruri_models.py +322 -0
- mteb/models/model_implementations/salesforce_models.py +6 -3
- mteb/models/model_implementations/samilpwc_models.py +2 -1
- mteb/models/model_implementations/sarashina_embedding_models.py +168 -0
- mteb/models/model_implementations/searchmap_models.py +2 -1
- mteb/models/model_implementations/seed_1_6_embedding_models.py +8 -2
- mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +625 -0
- mteb/models/model_implementations/seed_models.py +1 -0
- mteb/models/model_implementations/sentence_transformers_models.py +177 -18
- mteb/models/model_implementations/shuu_model.py +32 -31
- mteb/models/model_implementations/siglip_models.py +30 -20
- mteb/models/model_implementations/slm_models.py +416 -0
- mteb/models/model_implementations/sonar_models.py +1 -0
- mteb/models/model_implementations/spartan8806_atles_champion.py +34 -0
- mteb/models/model_implementations/stella_models.py +23 -4
- mteb/models/model_implementations/tarka_models.py +376 -0
- mteb/models/model_implementations/text2vec_models.py +9 -3
- mteb/models/model_implementations/ua_sentence_models.py +11 -1
- mteb/models/model_implementations/uae_models.py +8 -1
- mteb/models/model_implementations/vdr_models.py +3 -1
- mteb/models/model_implementations/vi_vn_models.py +45 -6
- mteb/models/model_implementations/vista_models.py +2 -0
- mteb/models/model_implementations/vlm2vec_models.py +5 -3
- mteb/models/model_implementations/voyage_models.py +99 -0
- mteb/models/model_implementations/voyage_v.py +17 -9
- mteb/models/model_implementations/xyz_models.py +1 -0
- mteb/models/model_implementations/youtu_models.py +2 -1
- mteb/models/model_implementations/yuan_models.py +34 -0
- mteb/models/model_implementations/yuan_models_en.py +58 -0
- mteb/models/model_meta.py +498 -29
- mteb/models/models_protocols.py +22 -6
- mteb/models/search_encoder_index/__init__.py +7 -0
- mteb/models/search_encoder_index/search_backend_protocol.py +50 -0
- mteb/models/search_encoder_index/search_indexes/__init__.py +5 -0
- mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +160 -0
- mteb/models/search_wrappers.py +197 -65
- mteb/models/sentence_transformer_wrapper.py +52 -32
- mteb/models/vllm_wrapper.py +327 -0
- mteb/py.typed +0 -0
- mteb/results/benchmark_results.py +114 -65
- mteb/results/model_result.py +63 -26
- mteb/results/task_result.py +117 -77
- mteb/similarity_functions.py +60 -7
- mteb/tasks/bitext_mining/multilingual/__init__.py +2 -1
- mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +4 -2
- mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining_fast.py +1 -1
- mteb/tasks/bitext_mining/multilingual/ru_sci_bench_bitext_mining.py +47 -5
- mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -6
- mteb/tasks/classification/ara/ajgt.py +1 -2
- mteb/tasks/classification/ara/hotel_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/ara/tweet_emotion_classification.py +1 -2
- mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -2
- mteb/tasks/classification/ben/bengali_document_classification.py +1 -2
- mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -2
- mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -2
- mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/ces/czech_product_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/ces/czech_so_me_sentiment_classification.py +1 -2
- mteb/tasks/classification/dan/angry_tweets_classification.py +1 -2
- mteb/tasks/classification/dan/danish_political_comments_classification.py +1 -2
- mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -2
- mteb/tasks/classification/dan/dk_hate_classification.py +2 -3
- mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -2
- mteb/tasks/classification/deu/ten_k_gnad_classification.py +1 -2
- mteb/tasks/classification/eng/amazon_polarity_classification.py +1 -2
- mteb/tasks/classification/eng/arxiv_classification.py +1 -2
- mteb/tasks/classification/eng/banking77_classification.py +1 -2
- mteb/tasks/classification/eng/dbpedia_classification.py +1 -2
- mteb/tasks/classification/eng/emotion_classification.py +1 -2
- mteb/tasks/classification/eng/financial_phrasebank_classification.py +1 -2
- mteb/tasks/classification/eng/frenk_en_classification.py +1 -2
- mteb/tasks/classification/eng/gtsrb_classification.py +1 -1
- mteb/tasks/classification/eng/imdb_classification.py +1 -2
- mteb/tasks/classification/eng/legal_bench_classification.py +14 -120
- mteb/tasks/classification/eng/news_classification.py +1 -2
- mteb/tasks/classification/eng/patch_camelyon_classification.py +1 -1
- mteb/tasks/classification/eng/patent_classification.py +1 -2
- mteb/tasks/classification/eng/poem_sentiment_classification.py +1 -2
- mteb/tasks/classification/eng/sds_eye_protection_classification.py +1 -2
- mteb/tasks/classification/eng/sds_gloves_classification.py +1 -2
- mteb/tasks/classification/eng/toxic_chat_classification.py +2 -19
- mteb/tasks/classification/eng/toxic_conversations_classification.py +1 -2
- mteb/tasks/classification/eng/tweet_sentiment_extraction_classification.py +1 -2
- mteb/tasks/classification/eng/tweet_topic_single_classification.py +2 -13
- mteb/tasks/classification/eng/ucf101_classification.py +1 -5
- mteb/tasks/classification/eng/wikipedia_bio_met_chem_classification.py +1 -2
- mteb/tasks/classification/eng/wikipedia_chem_fields_classification.py +1 -2
- mteb/tasks/classification/eng/wikipedia_comp_chem_spectroscopy_classification.py +1 -2
- mteb/tasks/classification/eng/wikipedia_crystallography_analytical_classification.py +1 -2
- mteb/tasks/classification/eng/wikipedia_theoretical_applied_classification.py +1 -2
- mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -2
- mteb/tasks/classification/eng/yelp_review_full_classification.py +1 -2
- mteb/tasks/classification/est/estonian_valence.py +2 -3
- mteb/tasks/classification/fas/fa_mteb_classification.py +7 -14
- mteb/tasks/classification/fil/filipino_hate_speech_classification.py +1 -2
- mteb/tasks/classification/fin/fin_toxicity_classification.py +2 -11
- mteb/tasks/classification/fra/french_book_reviews.py +1 -2
- mteb/tasks/classification/fra/movie_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/guj/gujarati_news_classification.py +1 -2
- mteb/tasks/classification/heb/__init__.py +6 -1
- mteb/tasks/classification/heb/hebrew_sentiment_analysis.py +62 -4
- mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -2
- mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -2
- mteb/tasks/classification/hrv/frenk_hr_classification.py +1 -2
- mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +1 -2
- mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -2
- mteb/tasks/classification/ita/italian_linguist_acceptability_classification.py +1 -2
- mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -2
- mteb/tasks/classification/jpn/wrime_classification.py +1 -2
- mteb/tasks/classification/kan/kannada_news_classification.py +1 -2
- mteb/tasks/classification/kor/klue_tc.py +1 -2
- mteb/tasks/classification/kor/kor_hate_classification.py +2 -17
- mteb/tasks/classification/kor/kor_sarcasm_classification.py +2 -19
- mteb/tasks/classification/kur/kurdish_sentiment_classification.py +3 -4
- mteb/tasks/classification/mal/malayalam_news_classification.py +1 -2
- mteb/tasks/classification/mar/marathi_news_classification.py +1 -2
- mteb/tasks/classification/mkd/macedonian_tweet_sentiment_classification.py +1 -2
- mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -6
- mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -4
- mteb/tasks/classification/multilingual/ru_sci_bench_classification.py +4 -23
- mteb/tasks/classification/multilingual/scala_classification.py +2 -3
- mteb/tasks/classification/multilingual/sib200_classification.py +1 -6
- mteb/tasks/classification/mya/myanmar_news.py +1 -2
- mteb/tasks/classification/nep/nepali_news_classification.py +1 -2
- mteb/tasks/classification/nld/dutch_book_review_sentiment_classification.py +4 -2
- mteb/tasks/classification/nld/dutch_cola_classification.py +3 -0
- mteb/tasks/classification/nld/dutch_government_bias_classification.py +3 -0
- mteb/tasks/classification/nld/dutch_news_articles_classification.py +3 -0
- mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +3 -0
- mteb/tasks/classification/nld/iconclass_classification.py +3 -0
- mteb/tasks/classification/nld/open_tender_classification.py +3 -0
- mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +3 -0
- mteb/tasks/classification/nob/no_rec_classification.py +1 -2
- mteb/tasks/classification/nob/norwegian_parliament_classification.py +1 -2
- mteb/tasks/classification/ory/odia_news_classification.py +1 -2
- mteb/tasks/classification/pol/polish_classification.py +3 -6
- mteb/tasks/classification/ron/moroco.py +1 -2
- mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -2
- mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -2
- mteb/tasks/classification/rus/georeview_classification.py +1 -2
- mteb/tasks/classification/rus/headline_classification.py +1 -2
- mteb/tasks/classification/rus/inappropriateness_classification.py +1 -2
- mteb/tasks/classification/rus/ru_reviews_classification.py +1 -2
- mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -2
- mteb/tasks/classification/rus/senti_ru_eval.py +1 -2
- mteb/tasks/classification/sin/sinhala_news_classification.py +1 -2
- mteb/tasks/classification/sin/sinhala_news_source_classification.py +1 -2
- mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/slk/slovak_hate_speech_classification.py +1 -2
- mteb/tasks/classification/slk/slovak_movie_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/slv/frenk_sl_classification.py +1 -2
- mteb/tasks/classification/spa/spanish_news_classification.py +1 -2
- mteb/tasks/classification/spa/spanish_sentiment_classification.py +1 -2
- mteb/tasks/classification/ssw/siswati_news_classification.py +1 -2
- mteb/tasks/classification/swa/swahili_news_classification.py +1 -2
- mteb/tasks/classification/swe/dalaj_classification.py +1 -2
- mteb/tasks/classification/swe/swe_rec_classification.py +1 -2
- mteb/tasks/classification/swe/swedish_sentiment_classification.py +1 -2
- mteb/tasks/classification/tam/tamil_news_classification.py +1 -2
- mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +1 -2
- mteb/tasks/classification/tha/wisesight_sentiment_classification.py +1 -2
- mteb/tasks/classification/tsn/tswana_news_classification.py +1 -2
- mteb/tasks/classification/tur/__init__.py +4 -0
- mteb/tasks/classification/tur/turkish_constitutional_court.py +41 -0
- mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +1 -2
- mteb/tasks/classification/tur/turkish_product_sentiment_classification.py +1 -2
- mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -15
- mteb/tasks/classification/urd/urdu_roman_sentiment_classification.py +1 -2
- mteb/tasks/classification/vie/amazon_counterfactual_vn_classification.py +1 -6
- mteb/tasks/classification/vie/amazon_polarity_vn_classification.py +1 -6
- mteb/tasks/classification/vie/amazon_reviews_vn_classification.py +1 -5
- mteb/tasks/classification/vie/banking77_vn_classification.py +1 -5
- mteb/tasks/classification/vie/emotion_vn_classification.py +1 -5
- mteb/tasks/classification/vie/imdb_vn_classification.py +1 -5
- mteb/tasks/classification/vie/massive_intent_vn_classification.py +1 -5
- mteb/tasks/classification/vie/massive_scenario_vn_classification.py +1 -5
- mteb/tasks/classification/vie/mtop_domain_vn_classification.py +1 -5
- mteb/tasks/classification/vie/mtop_intent_vn_classification.py +1 -5
- mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -5
- mteb/tasks/classification/vie/tweet_sentiment_extraction_vn_classification.py +1 -5
- mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -2
- mteb/tasks/classification/zho/cmteb_classification.py +5 -10
- mteb/tasks/classification/zho/yue_openrice_review_classification.py +1 -2
- mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -2
- mteb/tasks/clustering/eng/hume_wiki_cities_clustering.py +1 -1
- mteb/tasks/clustering/eng/wiki_cities_clustering.py +1 -1
- mteb/tasks/clustering/jpn/mews_c16_ja_clustering.py +1 -3
- mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -6
- mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +3 -0
- mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +3 -0
- mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +3 -0
- mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +3 -0
- mteb/tasks/clustering/nld/open_tender_clustering_s2s.py +3 -0
- mteb/tasks/clustering/nld/vabb_clustering_p2p.py +3 -0
- mteb/tasks/clustering/nld/vabb_clustering_s2s.py +3 -0
- mteb/tasks/clustering/vie/reddit_clustering_p2p_vn.py +1 -5
- mteb/tasks/clustering/vie/reddit_clustering_vn.py +1 -5
- mteb/tasks/clustering/vie/stack_exchange_clustering_p2p_vn.py +1 -5
- mteb/tasks/clustering/vie/stack_exchange_clustering_vn.py +1 -5
- mteb/tasks/clustering/vie/twenty_newsgroups_clustering_vn.py +1 -5
- mteb/tasks/clustering/zho/cmteb_clustering.py +2 -2
- mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +1 -1
- mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -5
- mteb/tasks/multilabel_classification/kor/kor_hate_speech_ml_classification.py +1 -9
- mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -6
- mteb/tasks/multilabel_classification/nld/covid_disinformation_nl_multi_label_classification.py +3 -0
- mteb/tasks/multilabel_classification/nld/vabb_multi_label_classification.py +3 -0
- mteb/tasks/multilabel_classification/por/brazilian_toxic_tweets_classification.py +1 -6
- mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
- mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -2
- mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -6
- mteb/tasks/pair_classification/eng/legal_bench_pc.py +1 -9
- mteb/tasks/pair_classification/nld/sick_nl_pair_classification.py +3 -0
- mteb/tasks/pair_classification/nld/xlwic_nl_pair_classification.py +3 -0
- mteb/tasks/pair_classification/rus/__init__.py +2 -2
- mteb/tasks/pair_classification/rus/terra.py +51 -25
- mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -5
- mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -5
- mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -5
- mteb/tasks/regression/multilingual/ru_sci_bench_regression.py +2 -6
- mteb/tasks/reranking/jpn/__init__.py +9 -1
- mteb/tasks/reranking/jpn/j_qa_ra_reranking_lite.py +49 -0
- mteb/tasks/reranking/jpn/ja_cwir_reranking_lite.py +47 -0
- mteb/tasks/reranking/multilingual/__init__.py +2 -0
- mteb/tasks/reranking/multilingual/multi_long_doc_reranking.py +70 -0
- mteb/tasks/reranking/multilingual/wikipedia_reranking_multilingual.py +1 -1
- mteb/tasks/reranking/multilingual/x_glue_wpr_reranking.py +1 -2
- mteb/tasks/reranking/vie/ask_ubuntu_dup_questions_vn.py +1 -5
- mteb/tasks/reranking/vie/sci_docs_reranking_vn.py +1 -5
- mteb/tasks/reranking/vie/stack_overflow_dup_questions_vn.py +1 -5
- mteb/tasks/retrieval/code/code_rag.py +12 -12
- mteb/tasks/retrieval/code/fresh_stack_retrieval.py +8 -5
- mteb/tasks/retrieval/dan/dan_fever_retrieval.py +1 -1
- mteb/tasks/retrieval/dan/tv2_nordretrieval.py +2 -2
- mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +2 -2
- mteb/tasks/retrieval/eng/__init__.py +2 -0
- mteb/tasks/retrieval/eng/chemrxiv.py +33 -0
- mteb/tasks/retrieval/eng/cub200_i2i_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -8
- mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +4 -0
- mteb/tasks/retrieval/jpn/__init__.py +8 -0
- mteb/tasks/retrieval/jpn/ja_cwir_retrieval.py +1 -4
- mteb/tasks/retrieval/jpn/ja_cwir_retrieval_lite.py +47 -0
- mteb/tasks/retrieval/jpn/jaqket_retrieval_lite.py +50 -0
- mteb/tasks/retrieval/jpn/miracl_ja_retrieval_lite.py +52 -0
- mteb/tasks/retrieval/jpn/mr_tydi_ja_retrieval_lite.py +48 -0
- mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +11 -4
- mteb/tasks/retrieval/kor/__init__.py +16 -1
- mteb/tasks/retrieval/kor/kovidore2_bench_retrieval.py +142 -0
- mteb/tasks/retrieval/kor/squad_kor_v1_retrieval.py +47 -0
- mteb/tasks/retrieval/multilingual/__init__.py +24 -0
- mteb/tasks/retrieval/multilingual/belebele_retrieval.py +5 -4
- mteb/tasks/retrieval/multilingual/euro_pirq_retrieval.py +43 -0
- mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +56 -42
- mteb/tasks/retrieval/multilingual/mkqa_retrieval.py +1 -2
- mteb/tasks/retrieval/multilingual/mlqa_retrieval.py +1 -4
- mteb/tasks/retrieval/multilingual/multi_long_doc_retrieval.py +1 -2
- mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +9 -4
- mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +2 -12
- mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +4 -2
- mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +389 -0
- mteb/tasks/retrieval/nld/__init__.py +8 -4
- mteb/tasks/retrieval/nld/argu_ana_nl_retrieval.py +46 -27
- mteb/tasks/retrieval/nld/bbsard_nl_retrieval.py +3 -0
- mteb/tasks/retrieval/nld/dutch_news_articles_retrieval.py +3 -0
- mteb/tasks/retrieval/nld/legal_qa_nl_retrieval.py +3 -0
- mteb/tasks/retrieval/nld/nf_corpus_nl_retrieval.py +42 -25
- mteb/tasks/retrieval/nld/open_tender_retrieval.py +3 -0
- mteb/tasks/retrieval/nld/sci_fact_nl_retrieval.py +42 -24
- mteb/tasks/retrieval/nld/scidocsnl_retrieval.py +44 -27
- mteb/tasks/retrieval/nld/vabb_retrieval.py +3 -0
- mteb/tasks/retrieval/nob/norquad.py +2 -2
- mteb/tasks/retrieval/nob/snl_retrieval.py +2 -2
- mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -7
- mteb/tasks/retrieval/tur/tur_hist_quad.py +1 -1
- mteb/tasks/retrieval/vie/__init__.py +14 -6
- mteb/tasks/retrieval/vie/argu_ana_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +40 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_android_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_gis_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_mathematica_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_physics_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_programmers_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_stats_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_tex_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_unix_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_webmasters_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_wordpress_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +40 -5
- mteb/tasks/retrieval/vie/fevervn_retrieval.py +40 -7
- mteb/tasks/retrieval/vie/fi_qa2018_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/green_node_table_markdown_retrieval.py +16 -1
- mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +40 -6
- mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +49 -5
- mteb/tasks/retrieval/vie/nf_corpus_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/nqvn_retrieval.py +40 -5
- mteb/tasks/retrieval/vie/quora_vn_retrieval.py +1 -6
- mteb/tasks/retrieval/vie/sci_fact_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/scidocsvn_retrieval.py +1 -6
- mteb/tasks/retrieval/vie/touche2020_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/treccovidvn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/tvpl_retrieval.py +42 -0
- mteb/tasks/retrieval/vie/zac_legal_text_retrieval.py +15 -1
- mteb/tasks/sts/nld/sick_nl_sts.py +1 -0
- mteb/tasks/sts/vie/biosses_stsvn.py +1 -5
- mteb/tasks/sts/vie/sickr_stsvn.py +1 -5
- mteb/tasks/sts/vie/sts_benchmark_stsvn.py +1 -5
- mteb/tasks/zeroshot_classification/eng/gtsrb.py +1 -1
- mteb/tasks/zeroshot_classification/eng/patch_camelyon.py +1 -1
- mteb/tasks/zeroshot_classification/eng/ucf101.py +1 -5
- mteb/types/__init__.py +2 -0
- mteb/types/_encoder_io.py +19 -2
- mteb/types/_result.py +2 -1
- mteb/types/statistics.py +9 -3
- {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/METADATA +25 -8
- {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/RECORD +525 -438
- mteb/models/model_implementations/mxbai_models.py +0 -102
- mteb/models/model_implementations/nb_sbert.py +0 -25
- {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/WHEEL +0 -0
- {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/entry_points.txt +0 -0
- {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/top_level.txt +0 -0
|
@@ -2520,15 +2520,7 @@ class Diversity6LegalBenchClassification(AbsTaskClassification):
|
|
|
2520
2520
|
class FunctionOfDecisionSectionLegalBenchClassification(AbsTaskClassification):
|
|
2521
2521
|
metadata = TaskMetadata(
|
|
2522
2522
|
name="FunctionOfDecisionSectionLegalBenchClassification",
|
|
2523
|
-
description="
|
|
2524
|
-
1. Facts - The paragraph describes the faction background that led up to the present lawsuit.
|
|
2525
|
-
2. Procedural History - The paragraph describes the course of litigation that led to the current proceeding before the court.
|
|
2526
|
-
3. Issue - The paragraph describes the legal or factual issue that must be resolved by the court.
|
|
2527
|
-
4. Rule - The paragraph describes a rule of law relevant to resolving the issue.
|
|
2528
|
-
5. Analysis - The paragraph analyzes the legal issue by applying the relevant legal principles to the facts of the present dispute.
|
|
2529
|
-
6. Conclusion - The paragraph presents a conclusion of the court.
|
|
2530
|
-
7. Decree - The paragraph constitutes a decree resolving the dispute.
|
|
2531
|
-
""",
|
|
2523
|
+
description="The task is to classify a paragraph extracted from a written court decision into one of seven possible categories: 1. Facts - The paragraph describes the faction background that led up to the present lawsuit. 2. Procedural History - The paragraph describes the course of litigation that led to the current proceeding before the court. 3. Issue - The paragraph describes the legal or factual issue that must be resolved by the court. 4. Rule - The paragraph describes a rule of law relevant to resolving the issue. 5. Analysis - The paragraph analyzes the legal issue by applying the relevant legal principles to the facts of the present dispute. 6. Conclusion - The paragraph presents a conclusion of the court. 7. Decree - The paragraph constitutes a decree resolving the dispute.",
|
|
2532
2524
|
reference="https://huggingface.co/datasets/nguha/legalbench",
|
|
2533
2525
|
dataset={
|
|
2534
2526
|
"path": "mteb/FunctionOfDecisionSectionLegalBenchClassification",
|
|
@@ -2677,8 +2669,7 @@ class JCrewBlockerLegalBenchClassification(AbsTaskClassification):
|
|
|
2677
2669
|
class JCrewBlockerLegalBenchClassificationV2(AbsTaskClassification):
|
|
2678
2670
|
metadata = TaskMetadata(
|
|
2679
2671
|
name="JCrewBlockerLegalBenchClassification.v2",
|
|
2680
|
-
description="
|
|
2681
|
-
This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
|
|
2672
|
+
description="The J.Crew Blocker, also known as the J.Crew Protection, is a provision included in leveraged loan documents to prevent companies from removing security by transferring intellectual property (IP) into new subsidiaries and raising additional debt. The task consists of determining whether the J.Crew Blocker is present in the document. This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
|
|
2682
2673
|
reference="https://huggingface.co/datasets/nguha/legalbench",
|
|
2683
2674
|
dataset={
|
|
2684
2675
|
"path": "mteb/j_crew_blocker_legal_bench",
|
|
@@ -3454,8 +3445,7 @@ class LegalReasoningCausalityLegalBenchClassification(AbsTaskClassification):
|
|
|
3454
3445
|
class LegalReasoningCausalityLegalBenchClassificationV2(AbsTaskClassification):
|
|
3455
3446
|
metadata = TaskMetadata(
|
|
3456
3447
|
name="LegalReasoningCausalityLegalBenchClassification.v2",
|
|
3457
|
-
description="
|
|
3458
|
-
This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
|
|
3448
|
+
description="Given an excerpt from a district court opinion, classify if it relies on statistical evidence in its reasoning. This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
|
|
3459
3449
|
reference="https://huggingface.co/datasets/nguha/legalbench",
|
|
3460
3450
|
dataset={
|
|
3461
3451
|
"path": "mteb/legal_reasoning_causality_legal_bench",
|
|
@@ -3599,44 +3589,7 @@ _MAUD_DATASET_MAP = [
|
|
|
3599
3589
|
class MAUDLegalBenchClassification(AbsTaskClassification):
|
|
3600
3590
|
metadata = TaskMetadata(
|
|
3601
3591
|
name="MAUDLegalBenchClassification",
|
|
3602
|
-
description="
|
|
3603
|
-
|
|
3604
|
-
This is a combination of all 34 of the MAUD Legal Bench datasets:
|
|
3605
|
-
1. MAUD Ability To Consummate Concept Is Subject To MAE Carveouts: Given an excerpt from a merger agreement and the task is to answer: is the “ability to consummate” concept subject to Material Adverse Effect (MAE) carveouts? amongst the multiple choice options.
|
|
3606
|
-
2. MAUD Accuracy Of Fundamental Target RWS Bringdown Standard: Given an excerpt from a merger agreement and the task is to answer: how accurate must the fundamental representations and warranties be according to the bring down provision, amongst the multiple choice options.
|
|
3607
|
-
3. MAUD Accuracy Of Target Capitalization RW Outstanding Shares Bringdown Standard Answer: Given an excerpt from a merger agreement and the task is to answer: how accurate must the fundamental representations and warranties be according to the bring down provision, amongst the multiple choice options.
|
|
3608
|
-
4. MAUD Accuracy Of Target General RW Bringdown Timing Answer: Given an excerpt from a merger agreement and the task is to answer: how accurate must the fundamental representations and warranties be according to the bring down provision, amongst the multiple choice options.
|
|
3609
|
-
5. MAUD Additional Matching Rights Period For Modifications Cor: Given an excerpt from a merger agreement and the task is to answer: how long is the additional matching rights period for modifications in case the board changes its recommendation, amongst the multiple choice options.
|
|
3610
|
-
6. MAUD Application Of Buyer Consent Requirement Negative Interim Covenant: Given an excerpt from a merger agreement and the task is to answer: what negative covenants does the requirement of Buyer consent apply to, amongst the multiple choice options.
|
|
3611
|
-
7. MAUD Buyer Consent Requirement Ordinary Course: Given an excerpt from a merger agreement and the task is to answer: in case the Buyer's consent for the acquired company's ordinary business operations is required, are there any limitations on the Buyer's right to condition, withhold, or delay their consent, amongst the multiple choice options.
|
|
3612
|
-
8. MAUD Change In Law Subject To Disproportionate Impact Modifier: Given an excerpt from a merger agreement and the task is to answer: do changes in law that have disproportionate impact qualify for Material Adverse Effect (MAE), amongst the multiple choice options.
|
|
3613
|
-
9. MAUD Changes In GAAP Or Other Accounting Principles Subject To Disproportionate Impact Modifier: Given an excerpt from a merger agreement and the task is to answer: do changes in GAAP or other accounting principles that have disproportionate impact qualify for Material Adverse Effect (MAE), amongst the multiple choice options.
|
|
3614
|
-
10. MAUD COR Permitted In Response To Intervening Event: Given an excerpt from a merger agreement and the task is to answer: is Change of Recommendation permitted in response to an intervening event, amongst the multiple choice options.
|
|
3615
|
-
11. MAUD COR Permitted With Board Fiduciary Determination Only: Given an excerpt from a merger agreement and the task is to answer: is Change of Recommendation permitted as long as the board determines that such change is required to fulfill its fiduciary obligations, amongst the multiple choice options.
|
|
3616
|
-
12. MAUD COR Standard Intervening Event: Given an excerpt from a merger agreement and the task is to answer: what standard should the board follow when determining whether to change its recommendation in response to an intervening event, amongst the multiple choice options.
|
|
3617
|
-
13. MAUD COR Standard Superior Offer: Given an excerpt from a merger agreement and the task is to answer: what standard should the board follow when determining whether to change its recommendation in connection with a superior offer, amongst the multiple choice options.
|
|
3618
|
-
14. MAUD Definition Contains Knowledge Requirement Answer: Given an excerpt from a merger agreement and the task is to answer: what is the knowledge requirement in the definition of “Intervening Event”, amongst the multiple choice options.
|
|
3619
|
-
15. MAUD Definition Includes Asset Deals: Given an excerpt from a merger agreement and the task is to answer: what qualifies as a superior offer in terms of asset deals, amongst the multiple choice options.
|
|
3620
|
-
16. MAUD Definition Includes Stock Deals: Given an excerpt from a merger agreement and the task is to answer: what qualifies as a superior offer in terms of stock deals, amongst the multiple choice options.
|
|
3621
|
-
17. MAUD Fiduciary Exception Board Determination Standard: Given an excerpt from a merger agreement and the task is to answer: under what circumstances could the Board take actions on a different acquisition proposal notwithstanding the no-shop provision, amongst the multiple choice options.
|
|
3622
|
-
18. MAUD Fiduciary Exception Board Determination Trigger No Shop: Given an excerpt from a merger agreement and the task is to answer: what type of offer could the Board take actions on notwithstanding the no-shop provision, amongst the multiple choice options.
|
|
3623
|
-
19. MAUD Financial Point Of View Is The Sole Consideration: Given an excerpt from a merger agreement and the task is to answer: is “financial point of view” the sole consideration when determining whether an offer is superior, amongst the multiple choice options.
|
|
3624
|
-
20. MAUD FLS MAE Standard: Given an excerpt from a merger agreement and the task is to answer: what is the Forward Looking Standard (FLS) with respect to Material Adverse Effect (MAE), amongst the multiple choice options.
|
|
3625
|
-
21. MAUD General Economic and Financial Conditions Subject To Disproportionate Impact Modifier: Given an excerpt from a merger agreement and the task is to answer: do changes caused by general economic and financial conditions that have disproportionate impact qualify for Material Adverse Effect (MAE), amongst the multiple choice options.
|
|
3626
|
-
22. MAUD Includes Consistent With Past Practice: Given an excerpt from a merger agreement and the task is to answer: does the wording of the Efforts Covenant clause include “consistent with past practice”, amongst the multiple choice options.
|
|
3627
|
-
23. MAUD Initial Matching Rights Period COR: Given an excerpt from a merger agreement and the task is to answer: how long is the initial matching rights period in case the board changes its recommendation, amongst the multiple choice options.
|
|
3628
|
-
24. MAUD Initial Matching Rights Period FTR: Given an excerpt from a merger agreement and the task is to answer: how long is the initial matching rights period in connection with the Fiduciary Termination Right (FTR), amongst the multiple choice options.
|
|
3629
|
-
25. MAUDInterveningEventRequiredToOccurAfterSigningAnswer: Given an excerpt from a merger agreement and the task is to answer: is an “Intervening Event” required to occur after signing, amongst the multiple choice options.
|
|
3630
|
-
26. MAUD Knowledge Definition: Given an excerpt from a merger agreement and the task is to answer: what counts as Knowledge, amongst the multiple choice options.
|
|
3631
|
-
27. MAUDLiabilityStandardForNoShopBreachByTargetNonDORepresentatives: Given an excerpt from a merger agreement and the task is to answer: what is the liability standard for no-shop breach by Target Non-D&O Representatives, amongst the multiple choice options.
|
|
3632
|
-
28. MAUD Ordinary Course Efforts Standard: Given an excerpt from a merger agreement and the task is to answer: what is the efforts standard, amongst the multiple choice options.
|
|
3633
|
-
29. MAUD Pandemic Or Other Public Health Event Subject To Disproportionate Impact Modifier: Given an excerpt from a merger agreement and the task is to answer: do pandemics or other public health events have to have disproportionate impact to qualify for Material Adverse Effect (MAE), amongst the multiple choice options.
|
|
3634
|
-
30. MAUD Pandemic Or Other Public Health Event Specific Reference To Pandemic Related Governmental Responses Or Measures: Given an excerpt from a merger agreement and the task is to answer: is there specific reference to pandemic-related governmental responses or measures in the clause that qualifies pandemics or other public health events for Material Adverse Effect (MAE), amongst the multiple choice options.
|
|
3635
|
-
31. MAUD Relational Language MAE Applies To: Given an excerpt from a merger agreement and the task is to answer: what carveouts pertaining to Material Adverse Effect (MAE) does the relational language apply to?, amongst the multiple choice options.
|
|
3636
|
-
32. MAUD Specific Performance: Given an excerpt from a merger agreement and the task is to answer: what is the wording of the Specific Performance clause regarding the parties' entitlement in the event of a contractual breach, amongst the multiple choice options.
|
|
3637
|
-
33. MAUD Tail Period Length: Given an excerpt from a merger agreement and the task is to answer: how long is the Tail Period, amongst the multiple choice options.
|
|
3638
|
-
34. MAUD Type Of Consideration: Given an excerpt from a merger agreement and the task is to answer: what type of consideration is specified in this agreement, amongst the multiple choice options.
|
|
3639
|
-
""",
|
|
3592
|
+
description="This task was constructed from the MAUD dataset, which consists of over 47,000 labels across 152 merger agreements annotated to identify 92 questions in each agreement used by the 2021 American Bar Association (ABA) Public Target Deal Points Study. Each dataset is formatted as a series of multiple-choice questions, where given a segment of the merger agreement and a Deal Point question, the model is to choose the answer that best characterizes the agreement as response. This is a combination of all 34 of the MAUD Legal Bench datasets: 1. MAUD Ability To Consummate Concept Is Subject To MAE Carveouts: Given an excerpt from a merger agreement and the task is to answer: is the “ability to consummate” concept subject to Material Adverse Effect (MAE) carveouts? amongst the multiple choice options. 2. MAUD Accuracy Of Fundamental Target RWS Bringdown Standard: Given an excerpt from a merger agreement and the task is to answer: how accurate must the fundamental representations and warranties be according to the bring down provision, amongst the multiple choice options. 3. MAUD Accuracy Of Target Capitalization RW Outstanding Shares Bringdown Standard Answer: Given an excerpt from a merger agreement and the task is to answer: how accurate must the fundamental representations and warranties be according to the bring down provision, amongst the multiple choice options. 4. MAUD Accuracy Of Target General RW Bringdown Timing Answer: Given an excerpt from a merger agreement and the task is to answer: how accurate must the fundamental representations and warranties be according to the bring down provision, amongst the multiple choice options. 5. MAUD Additional Matching Rights Period For Modifications Cor: Given an excerpt from a merger agreement and the task is to answer: how long is the additional matching rights period for modifications in case the board changes its recommendation, amongst the multiple choice options. 6. MAUD Application Of Buyer Consent Requirement Negative Interim Covenant: Given an excerpt from a merger agreement and the task is to answer: what negative covenants does the requirement of Buyer consent apply to, amongst the multiple choice options. 7. MAUD Buyer Consent Requirement Ordinary Course: Given an excerpt from a merger agreement and the task is to answer: in case the Buyer's consent for the acquired company's ordinary business operations is required, are there any limitations on the Buyer's right to condition, withhold, or delay their consent, amongst the multiple choice options. 8. MAUD Change In Law Subject To Disproportionate Impact Modifier: Given an excerpt from a merger agreement and the task is to answer: do changes in law that have disproportionate impact qualify for Material Adverse Effect (MAE), amongst the multiple choice options. 9. MAUD Changes In GAAP Or Other Accounting Principles Subject To Disproportionate Impact Modifier: Given an excerpt from a merger agreement and the task is to answer: do changes in GAAP or other accounting principles that have disproportionate impact qualify for Material Adverse Effect (MAE), amongst the multiple choice options. 10. MAUD COR Permitted In Response To Intervening Event: Given an excerpt from a merger agreement and the task is to answer: is Change of Recommendation permitted in response to an intervening event, amongst the multiple choice options. 11. MAUD COR Permitted With Board Fiduciary Determination Only: Given an excerpt from a merger agreement and the task is to answer: is Change of Recommendation permitted as long as the board determines that such change is required to fulfill its fiduciary obligations, amongst the multiple choice options. 12. MAUD COR Standard Intervening Event: Given an excerpt from a merger agreement and the task is to answer: what standard should the board follow when determining whether to change its recommendation in response to an intervening event, amongst the multiple choice options. 13. MAUD COR Standard Superior Offer: Given an excerpt from a merger agreement and the task is to answer: what standard should the board follow when determining whether to change its recommendation in connection with a superior offer, amongst the multiple choice options. 14. MAUD Definition Contains Knowledge Requirement Answer: Given an excerpt from a merger agreement and the task is to answer: what is the knowledge requirement in the definition of “Intervening Event”, amongst the multiple choice options. 15. MAUD Definition Includes Asset Deals: Given an excerpt from a merger agreement and the task is to answer: what qualifies as a superior offer in terms of asset deals, amongst the multiple choice options. 16. MAUD Definition Includes Stock Deals: Given an excerpt from a merger agreement and the task is to answer: what qualifies as a superior offer in terms of stock deals, amongst the multiple choice options. 17. MAUD Fiduciary Exception Board Determination Standard: Given an excerpt from a merger agreement and the task is to answer: under what circumstances could the Board take actions on a different acquisition proposal notwithstanding the no-shop provision, amongst the multiple choice options. 18. MAUD Fiduciary Exception Board Determination Trigger No Shop: Given an excerpt from a merger agreement and the task is to answer: what type of offer could the Board take actions on notwithstanding the no-shop provision, amongst the multiple choice options. 19. MAUD Financial Point Of View Is The Sole Consideration: Given an excerpt from a merger agreement and the task is to answer: is “financial point of view” the sole consideration when determining whether an offer is superior, amongst the multiple choice options. 20. MAUD FLS MAE Standard: Given an excerpt from a merger agreement and the task is to answer: what is the Forward Looking Standard (FLS) with respect to Material Adverse Effect (MAE), amongst the multiple choice options. 21. MAUD General Economic and Financial Conditions Subject To Disproportionate Impact Modifier: Given an excerpt from a merger agreement and the task is to answer: do changes caused by general economic and financial conditions that have disproportionate impact qualify for Material Adverse Effect (MAE), amongst the multiple choice options. 22. MAUD Includes Consistent With Past Practice: Given an excerpt from a merger agreement and the task is to answer: does the wording of the Efforts Covenant clause include “consistent with past practice”, amongst the multiple choice options. 23. MAUD Initial Matching Rights Period COR: Given an excerpt from a merger agreement and the task is to answer: how long is the initial matching rights period in case the board changes its recommendation, amongst the multiple choice options. 24. MAUD Initial Matching Rights Period FTR: Given an excerpt from a merger agreement and the task is to answer: how long is the initial matching rights period in connection with the Fiduciary Termination Right (FTR), amongst the multiple choice options. 25. MAUDInterveningEventRequiredToOccurAfterSigningAnswer: Given an excerpt from a merger agreement and the task is to answer: is an “Intervening Event” required to occur after signing, amongst the multiple choice options. 26. MAUD Knowledge Definition: Given an excerpt from a merger agreement and the task is to answer: what counts as Knowledge, amongst the multiple choice options. 27. MAUDLiabilityStandardForNoShopBreachByTargetNonDORepresentatives: Given an excerpt from a merger agreement and the task is to answer: what is the liability standard for no-shop breach by Target Non-D&O Representatives, amongst the multiple choice options. 28. MAUD Ordinary Course Efforts Standard: Given an excerpt from a merger agreement and the task is to answer: what is the efforts standard, amongst the multiple choice options. 29. MAUD Pandemic Or Other Public Health Event Subject To Disproportionate Impact Modifier: Given an excerpt from a merger agreement and the task is to answer: do pandemics or other public health events have to have disproportionate impact to qualify for Material Adverse Effect (MAE), amongst the multiple choice options. 30. MAUD Pandemic Or Other Public Health Event Specific Reference To Pandemic Related Governmental Responses Or Measures: Given an excerpt from a merger agreement and the task is to answer: is there specific reference to pandemic-related governmental responses or measures in the clause that qualifies pandemics or other public health events for Material Adverse Effect (MAE), amongst the multiple choice options. 31. MAUD Relational Language MAE Applies To: Given an excerpt from a merger agreement and the task is to answer: what carveouts pertaining to Material Adverse Effect (MAE) does the relational language apply to?, amongst the multiple choice options. 32. MAUD Specific Performance: Given an excerpt from a merger agreement and the task is to answer: what is the wording of the Specific Performance clause regarding the parties' entitlement in the event of a contractual breach, amongst the multiple choice options. 33. MAUD Tail Period Length: Given an excerpt from a merger agreement and the task is to answer: how long is the Tail Period, amongst the multiple choice options. 34. MAUD Type Of Consideration: Given an excerpt from a merger agreement and the task is to answer: what type of consideration is specified in this agreement, amongst the multiple choice options.",
|
|
3640
3593
|
reference="https://huggingface.co/datasets/nguha/legalbench",
|
|
3641
3594
|
dataset={
|
|
3642
3595
|
"path": "mteb/MAUDLegalBenchClassification",
|
|
@@ -3679,45 +3632,7 @@ class MAUDLegalBenchClassification(AbsTaskClassification):
|
|
|
3679
3632
|
class MAUDLegalBenchClassificationV2(AbsTaskClassification):
|
|
3680
3633
|
metadata = TaskMetadata(
|
|
3681
3634
|
name="MAUDLegalBenchClassification.v2",
|
|
3682
|
-
description="
|
|
3683
|
-
|
|
3684
|
-
This is a combination of all 34 of the MAUD Legal Bench datasets:
|
|
3685
|
-
1. MAUD Ability To Consummate Concept Is Subject To MAE Carveouts: Given an excerpt from a merger agreement and the task is to answer: is the “ability to consummate” concept subject to Material Adverse Effect (MAE) carveouts? amongst the multiple choice options.
|
|
3686
|
-
2. MAUD Accuracy Of Fundamental Target RWS Bringdown Standard: Given an excerpt from a merger agreement and the task is to answer: how accurate must the fundamental representations and warranties be according to the bring down provision, amongst the multiple choice options.
|
|
3687
|
-
3. MAUD Accuracy Of Target Capitalization RW Outstanding Shares Bringdown Standard Answer: Given an excerpt from a merger agreement and the task is to answer: how accurate must the fundamental representations and warranties be according to the bring down provision, amongst the multiple choice options.
|
|
3688
|
-
4. MAUD Accuracy Of Target General RW Bringdown Timing Answer: Given an excerpt from a merger agreement and the task is to answer: how accurate must the fundamental representations and warranties be according to the bring down provision, amongst the multiple choice options.
|
|
3689
|
-
5. MAUD Additional Matching Rights Period For Modifications Cor: Given an excerpt from a merger agreement and the task is to answer: how long is the additional matching rights period for modifications in case the board changes its recommendation, amongst the multiple choice options.
|
|
3690
|
-
6. MAUD Application Of Buyer Consent Requirement Negative Interim Covenant: Given an excerpt from a merger agreement and the task is to answer: what negative covenants does the requirement of Buyer consent apply to, amongst the multiple choice options.
|
|
3691
|
-
7. MAUD Buyer Consent Requirement Ordinary Course: Given an excerpt from a merger agreement and the task is to answer: in case the Buyer's consent for the acquired company's ordinary business operations is required, are there any limitations on the Buyer's right to condition, withhold, or delay their consent, amongst the multiple choice options.
|
|
3692
|
-
8. MAUD Change In Law Subject To Disproportionate Impact Modifier: Given an excerpt from a merger agreement and the task is to answer: do changes in law that have disproportionate impact qualify for Material Adverse Effect (MAE), amongst the multiple choice options.
|
|
3693
|
-
9. MAUD Changes In GAAP Or Other Accounting Principles Subject To Disproportionate Impact Modifier: Given an excerpt from a merger agreement and the task is to answer: do changes in GAAP or other accounting principles that have disproportionate impact qualify for Material Adverse Effect (MAE), amongst the multiple choice options.
|
|
3694
|
-
10. MAUD COR Permitted In Response To Intervening Event: Given an excerpt from a merger agreement and the task is to answer: is Change of Recommendation permitted in response to an intervening event, amongst the multiple choice options.
|
|
3695
|
-
11. MAUD COR Permitted With Board Fiduciary Determination Only: Given an excerpt from a merger agreement and the task is to answer: is Change of Recommendation permitted as long as the board determines that such change is required to fulfill its fiduciary obligations, amongst the multiple choice options.
|
|
3696
|
-
12. MAUD COR Standard Intervening Event: Given an excerpt from a merger agreement and the task is to answer: what standard should the board follow when determining whether to change its recommendation in response to an intervening event, amongst the multiple choice options.
|
|
3697
|
-
13. MAUD COR Standard Superior Offer: Given an excerpt from a merger agreement and the task is to answer: what standard should the board follow when determining whether to change its recommendation in connection with a superior offer, amongst the multiple choice options.
|
|
3698
|
-
14. MAUD Definition Contains Knowledge Requirement Answer: Given an excerpt from a merger agreement and the task is to answer: what is the knowledge requirement in the definition of “Intervening Event”, amongst the multiple choice options.
|
|
3699
|
-
15. MAUD Definition Includes Asset Deals: Given an excerpt from a merger agreement and the task is to answer: what qualifies as a superior offer in terms of asset deals, amongst the multiple choice options.
|
|
3700
|
-
16. MAUD Definition Includes Stock Deals: Given an excerpt from a merger agreement and the task is to answer: what qualifies as a superior offer in terms of stock deals, amongst the multiple choice options.
|
|
3701
|
-
17. MAUD Fiduciary Exception Board Determination Standard: Given an excerpt from a merger agreement and the task is to answer: under what circumstances could the Board take actions on a different acquisition proposal notwithstanding the no-shop provision, amongst the multiple choice options.
|
|
3702
|
-
18. MAUD Fiduciary Exception Board Determination Trigger No Shop: Given an excerpt from a merger agreement and the task is to answer: what type of offer could the Board take actions on notwithstanding the no-shop provision, amongst the multiple choice options.
|
|
3703
|
-
19. MAUD Financial Point Of View Is The Sole Consideration: Given an excerpt from a merger agreement and the task is to answer: is “financial point of view” the sole consideration when determining whether an offer is superior, amongst the multiple choice options.
|
|
3704
|
-
20. MAUD FLS MAE Standard: Given an excerpt from a merger agreement and the task is to answer: what is the Forward Looking Standard (FLS) with respect to Material Adverse Effect (MAE), amongst the multiple choice options.
|
|
3705
|
-
21. MAUD General Economic and Financial Conditions Subject To Disproportionate Impact Modifier: Given an excerpt from a merger agreement and the task is to answer: do changes caused by general economic and financial conditions that have disproportionate impact qualify for Material Adverse Effect (MAE), amongst the multiple choice options.
|
|
3706
|
-
22. MAUD Includes Consistent With Past Practice: Given an excerpt from a merger agreement and the task is to answer: does the wording of the Efforts Covenant clause include “consistent with past practice”, amongst the multiple choice options.
|
|
3707
|
-
23. MAUD Initial Matching Rights Period COR: Given an excerpt from a merger agreement and the task is to answer: how long is the initial matching rights period in case the board changes its recommendation, amongst the multiple choice options.
|
|
3708
|
-
24. MAUD Initial Matching Rights Period FTR: Given an excerpt from a merger agreement and the task is to answer: how long is the initial matching rights period in connection with the Fiduciary Termination Right (FTR), amongst the multiple choice options.
|
|
3709
|
-
25. MAUDInterveningEventRequiredToOccurAfterSigningAnswer: Given an excerpt from a merger agreement and the task is to answer: is an “Intervening Event” required to occur after signing, amongst the multiple choice options.
|
|
3710
|
-
26. MAUD Knowledge Definition: Given an excerpt from a merger agreement and the task is to answer: what counts as Knowledge, amongst the multiple choice options.
|
|
3711
|
-
27. MAUDLiabilityStandardForNoShopBreachByTargetNonDORepresentatives: Given an excerpt from a merger agreement and the task is to answer: what is the liability standard for no-shop breach by Target Non-D&O Representatives, amongst the multiple choice options.
|
|
3712
|
-
28. MAUD Ordinary Course Efforts Standard: Given an excerpt from a merger agreement and the task is to answer: what is the efforts standard, amongst the multiple choice options.
|
|
3713
|
-
29. MAUD Pandemic Or Other Public Health Event Subject To Disproportionate Impact Modifier: Given an excerpt from a merger agreement and the task is to answer: do pandemics or other public health events have to have disproportionate impact to qualify for Material Adverse Effect (MAE), amongst the multiple choice options.
|
|
3714
|
-
30. MAUD Pandemic Or Other Public Health Event Specific Reference To Pandemic Related Governmental Responses Or Measures: Given an excerpt from a merger agreement and the task is to answer: is there specific reference to pandemic-related governmental responses or measures in the clause that qualifies pandemics or other public health events for Material Adverse Effect (MAE), amongst the multiple choice options.
|
|
3715
|
-
31. MAUD Relational Language MAE Applies To: Given an excerpt from a merger agreement and the task is to answer: what carveouts pertaining to Material Adverse Effect (MAE) does the relational language apply to?, amongst the multiple choice options.
|
|
3716
|
-
32. MAUD Specific Performance: Given an excerpt from a merger agreement and the task is to answer: what is the wording of the Specific Performance clause regarding the parties' entitlement in the event of a contractual breach, amongst the multiple choice options.
|
|
3717
|
-
33. MAUD Tail Period Length: Given an excerpt from a merger agreement and the task is to answer: how long is the Tail Period, amongst the multiple choice options.
|
|
3718
|
-
34. MAUD Type Of Consideration: Given an excerpt from a merger agreement and the task is to answer: what type of consideration is specified in this agreement, amongst the multiple choice options.
|
|
3719
|
-
|
|
3720
|
-
This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
|
|
3635
|
+
description="This task was constructed from the MAUD dataset, which consists of over 47,000 labels across 152 merger agreements annotated to identify 92 questions in each agreement used by the 2021 American Bar Association (ABA) Public Target Deal Points Study. Each dataset is formatted as a series of multiple-choice questions, where given a segment of the merger agreement and a Deal Point question, the model is to choose the answer that best characterizes the agreement as response. This is a combination of all 34 of the MAUD Legal Bench datasets: 1. MAUD Ability To Consummate Concept Is Subject To MAE Carveouts: Given an excerpt from a merger agreement and the task is to answer: is the “ability to consummate” concept subject to Material Adverse Effect (MAE) carveouts? amongst the multiple choice options. 2. MAUD Accuracy Of Fundamental Target RWS Bringdown Standard: Given an excerpt from a merger agreement and the task is to answer: how accurate must the fundamental representations and warranties be according to the bring down provision, amongst the multiple choice options. 3. MAUD Accuracy Of Target Capitalization RW Outstanding Shares Bringdown Standard Answer: Given an excerpt from a merger agreement and the task is to answer: how accurate must the fundamental representations and warranties be according to the bring down provision, amongst the multiple choice options. 4. MAUD Accuracy Of Target General RW Bringdown Timing Answer: Given an excerpt from a merger agreement and the task is to answer: how accurate must the fundamental representations and warranties be according to the bring down provision, amongst the multiple choice options. 5. MAUD Additional Matching Rights Period For Modifications Cor: Given an excerpt from a merger agreement and the task is to answer: how long is the additional matching rights period for modifications in case the board changes its recommendation, amongst the multiple choice options. 6. MAUD Application Of Buyer Consent Requirement Negative Interim Covenant: Given an excerpt from a merger agreement and the task is to answer: what negative covenants does the requirement of Buyer consent apply to, amongst the multiple choice options. 7. MAUD Buyer Consent Requirement Ordinary Course: Given an excerpt from a merger agreement and the task is to answer: in case the Buyer's consent for the acquired company's ordinary business operations is required, are there any limitations on the Buyer's right to condition, withhold, or delay their consent, amongst the multiple choice options. 8. MAUD Change In Law Subject To Disproportionate Impact Modifier: Given an excerpt from a merger agreement and the task is to answer: do changes in law that have disproportionate impact qualify for Material Adverse Effect (MAE), amongst the multiple choice options. 9. MAUD Changes In GAAP Or Other Accounting Principles Subject To Disproportionate Impact Modifier: Given an excerpt from a merger agreement and the task is to answer: do changes in GAAP or other accounting principles that have disproportionate impact qualify for Material Adverse Effect (MAE), amongst the multiple choice options. 10. MAUD COR Permitted In Response To Intervening Event: Given an excerpt from a merger agreement and the task is to answer: is Change of Recommendation permitted in response to an intervening event, amongst the multiple choice options. 11. MAUD COR Permitted With Board Fiduciary Determination Only: Given an excerpt from a merger agreement and the task is to answer: is Change of Recommendation permitted as long as the board determines that such change is required to fulfill its fiduciary obligations, amongst the multiple choice options. 12. MAUD COR Standard Intervening Event: Given an excerpt from a merger agreement and the task is to answer: what standard should the board follow when determining whether to change its recommendation in response to an intervening event, amongst the multiple choice options. 13. MAUD COR Standard Superior Offer: Given an excerpt from a merger agreement and the task is to answer: what standard should the board follow when determining whether to change its recommendation in connection with a superior offer, amongst the multiple choice options. 14. MAUD Definition Contains Knowledge Requirement Answer: Given an excerpt from a merger agreement and the task is to answer: what is the knowledge requirement in the definition of “Intervening Event”, amongst the multiple choice options. 15. MAUD Definition Includes Asset Deals: Given an excerpt from a merger agreement and the task is to answer: what qualifies as a superior offer in terms of asset deals, amongst the multiple choice options. 16. MAUD Definition Includes Stock Deals: Given an excerpt from a merger agreement and the task is to answer: what qualifies as a superior offer in terms of stock deals, amongst the multiple choice options. 17. MAUD Fiduciary Exception Board Determination Standard: Given an excerpt from a merger agreement and the task is to answer: under what circumstances could the Board take actions on a different acquisition proposal notwithstanding the no-shop provision, amongst the multiple choice options. 18. MAUD Fiduciary Exception Board Determination Trigger No Shop: Given an excerpt from a merger agreement and the task is to answer: what type of offer could the Board take actions on notwithstanding the no-shop provision, amongst the multiple choice options. 19. MAUD Financial Point Of View Is The Sole Consideration: Given an excerpt from a merger agreement and the task is to answer: is “financial point of view” the sole consideration when determining whether an offer is superior, amongst the multiple choice options. 20. MAUD FLS MAE Standard: Given an excerpt from a merger agreement and the task is to answer: what is the Forward Looking Standard (FLS) with respect to Material Adverse Effect (MAE), amongst the multiple choice options. 21. MAUD General Economic and Financial Conditions Subject To Disproportionate Impact Modifier: Given an excerpt from a merger agreement and the task is to answer: do changes caused by general economic and financial conditions that have disproportionate impact qualify for Material Adverse Effect (MAE), amongst the multiple choice options. 22. MAUD Includes Consistent With Past Practice: Given an excerpt from a merger agreement and the task is to answer: does the wording of the Efforts Covenant clause include “consistent with past practice”, amongst the multiple choice options. 23. MAUD Initial Matching Rights Period COR: Given an excerpt from a merger agreement and the task is to answer: how long is the initial matching rights period in case the board changes its recommendation, amongst the multiple choice options. 24. MAUD Initial Matching Rights Period FTR: Given an excerpt from a merger agreement and the task is to answer: how long is the initial matching rights period in connection with the Fiduciary Termination Right (FTR), amongst the multiple choice options. 25. MAUDInterveningEventRequiredToOccurAfterSigningAnswer: Given an excerpt from a merger agreement and the task is to answer: is an “Intervening Event” required to occur after signing, amongst the multiple choice options. 26. MAUD Knowledge Definition: Given an excerpt from a merger agreement and the task is to answer: what counts as Knowledge, amongst the multiple choice options. 27. MAUDLiabilityStandardForNoShopBreachByTargetNonDORepresentatives: Given an excerpt from a merger agreement and the task is to answer: what is the liability standard for no-shop breach by Target Non-D&O Representatives, amongst the multiple choice options. 28. MAUD Ordinary Course Efforts Standard: Given an excerpt from a merger agreement and the task is to answer: what is the efforts standard, amongst the multiple choice options. 29. MAUD Pandemic Or Other Public Health Event Subject To Disproportionate Impact Modifier: Given an excerpt from a merger agreement and the task is to answer: do pandemics or other public health events have to have disproportionate impact to qualify for Material Adverse Effect (MAE), amongst the multiple choice options. 30. MAUD Pandemic Or Other Public Health Event Specific Reference To Pandemic Related Governmental Responses Or Measures: Given an excerpt from a merger agreement and the task is to answer: is there specific reference to pandemic-related governmental responses or measures in the clause that qualifies pandemics or other public health events for Material Adverse Effect (MAE), amongst the multiple choice options. 31. MAUD Relational Language MAE Applies To: Given an excerpt from a merger agreement and the task is to answer: what carveouts pertaining to Material Adverse Effect (MAE) does the relational language apply to?, amongst the multiple choice options. 32. MAUD Specific Performance: Given an excerpt from a merger agreement and the task is to answer: what is the wording of the Specific Performance clause regarding the parties' entitlement in the event of a contractual breach, amongst the multiple choice options. 33. MAUD Tail Period Length: Given an excerpt from a merger agreement and the task is to answer: how long is the Tail Period, amongst the multiple choice options. 34. MAUD Type Of Consideration: Given an excerpt from a merger agreement and the task is to answer: what type of consideration is specified in this agreement, amongst the multiple choice options. This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
|
|
3721
3636
|
reference="https://huggingface.co/datasets/nguha/legalbench",
|
|
3722
3637
|
dataset={
|
|
3723
3638
|
"path": "mteb/maud_legal_bench",
|
|
@@ -3882,8 +3797,7 @@ class OPP115DataSecurityLegalBenchClassification(AbsTaskClassification):
|
|
|
3882
3797
|
class OPP115DataSecurityLegalBenchClassificationV2(AbsTaskClassification):
|
|
3883
3798
|
metadata = TaskMetadata(
|
|
3884
3799
|
name="OPP115DataSecurityLegalBenchClassification.v2",
|
|
3885
|
-
description="
|
|
3886
|
-
This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
|
|
3800
|
+
description="Given a clause from a privacy policy, classify if the clause describes how user information is protected. This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
|
|
3887
3801
|
reference="https://huggingface.co/datasets/nguha/legalbench",
|
|
3888
3802
|
dataset={
|
|
3889
3803
|
"path": "mteb/opp115_data_security_legal_bench",
|
|
@@ -3971,8 +3885,7 @@ class OPP115DoNotTrackLegalBenchClassification(AbsTaskClassification):
|
|
|
3971
3885
|
class OPP115DoNotTrackLegalBenchClassificationV2(AbsTaskClassification):
|
|
3972
3886
|
metadata = TaskMetadata(
|
|
3973
3887
|
name="OPP115DoNotTrackLegalBenchClassification.v2",
|
|
3974
|
-
description="
|
|
3975
|
-
This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
|
|
3888
|
+
description="Given a clause from a privacy policy, classify if the clause describes if and how Do Not Track signals for online tracking and advertising are honored. This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
|
|
3976
3889
|
reference="https://huggingface.co/datasets/nguha/legalbench",
|
|
3977
3890
|
dataset={
|
|
3978
3891
|
"path": "mteb/opp115_do_not_track_legal_bench",
|
|
@@ -4277,8 +4190,7 @@ class OPP115UserChoiceControlLegalBenchClassification(AbsTaskClassification):
|
|
|
4277
4190
|
class OPP115UserChoiceControlLegalBenchClassificationV2(AbsTaskClassification):
|
|
4278
4191
|
metadata = TaskMetadata(
|
|
4279
4192
|
name="OPP115UserChoiceControlLegalBenchClassification.v2",
|
|
4280
|
-
description="
|
|
4281
|
-
This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
|
|
4193
|
+
description="Given a clause fro ma privacy policy, classify if the clause describes the choices and control options available to users. This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
|
|
4282
4194
|
reference="https://huggingface.co/datasets/nguha/legalbench",
|
|
4283
4195
|
dataset={
|
|
4284
4196
|
"path": "mteb/opp115_user_choice_control_legal_bench",
|
|
@@ -4322,15 +4234,7 @@ class OPP115UserChoiceControlLegalBenchClassificationV2(AbsTaskClassification):
|
|
|
4322
4234
|
class OralArgumentQuestionPurposeLegalBenchClassification(AbsTaskClassification):
|
|
4323
4235
|
metadata = TaskMetadata(
|
|
4324
4236
|
name="OralArgumentQuestionPurposeLegalBenchClassification",
|
|
4325
|
-
description="
|
|
4326
|
-
1. Background - questions seeking factual or procedural information that is missing or not clear in the briefing
|
|
4327
|
-
2. Clarification - questions seeking to get an advocate to clarify her position or the scope of the rule being advocated for
|
|
4328
|
-
3. Implications - questions about the limits of a rule or its implications for future cases
|
|
4329
|
-
4. Support - questions offering support for the advocate’s position
|
|
4330
|
-
5. Criticism - questions criticizing an advocate’s position
|
|
4331
|
-
6. Communicate - question designed primarily to communicate with other justices
|
|
4332
|
-
7. Humor - questions designed to interject humor into the argument and relieve tension
|
|
4333
|
-
""",
|
|
4237
|
+
description="This task classifies questions asked by Supreme Court justices at oral argument into seven categories: 1. Background - questions seeking factual or procedural information that is missing or not clear in the briefing 2. Clarification - questions seeking to get an advocate to clarify her position or the scope of the rule being advocated for 3. Implications - questions about the limits of a rule or its implications for future cases 4. Support - questions offering support for the advocate’s position 5. Criticism - questions criticizing an advocate’s position 6. Communicate - question designed primarily to communicate with other justices 7. Humor - questions designed to interject humor into the argument and relieve tension",
|
|
4334
4238
|
reference="https://huggingface.co/datasets/nguha/legalbench",
|
|
4335
4239
|
dataset={
|
|
4336
4240
|
"path": "mteb/OralArgumentQuestionPurposeLegalBenchClassification",
|
|
@@ -4366,16 +4270,7 @@ class OralArgumentQuestionPurposeLegalBenchClassification(AbsTaskClassification)
|
|
|
4366
4270
|
class OralArgumentQuestionPurposeLegalBenchClassificationV2(AbsTaskClassification):
|
|
4367
4271
|
metadata = TaskMetadata(
|
|
4368
4272
|
name="OralArgumentQuestionPurposeLegalBenchClassification.v2",
|
|
4369
|
-
description="
|
|
4370
|
-
1. Background - questions seeking factual or procedural information that is missing or not clear in the briefing
|
|
4371
|
-
2. Clarification - questions seeking to get an advocate to clarify her position or the scope of the rule being advocated for
|
|
4372
|
-
3. Implications - questions about the limits of a rule or its implications for future cases
|
|
4373
|
-
4. Support - questions offering support for the advocate’s position
|
|
4374
|
-
5. Criticism - questions criticizing an advocate’s position
|
|
4375
|
-
6. Communicate - question designed primarily to communicate with other justices
|
|
4376
|
-
7. Humor - questions designed to interject humor into the argument and relieve tension
|
|
4377
|
-
|
|
4378
|
-
This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
|
|
4273
|
+
description="This task classifies questions asked by Supreme Court justices at oral argument into seven categories: 1. Background - questions seeking factual or procedural information that is missing or not clear in the briefing 2. Clarification - questions seeking to get an advocate to clarify her position or the scope of the rule being advocated for 3. Implications - questions about the limits of a rule or its implications for future cases 4. Support - questions offering support for the advocate’s position 5. Criticism - questions criticizing an advocate’s position 6. Communicate - question designed primarily to communicate with other justices 7. Humor - questions designed to interject humor into the argument and relieve tension This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
|
|
4379
4274
|
reference="https://huggingface.co/datasets/nguha/legalbench",
|
|
4380
4275
|
dataset={
|
|
4381
4276
|
"path": "mteb/oral_argument_question_purpose_legal_bench",
|
|
@@ -4411,7 +4306,7 @@ class OralArgumentQuestionPurposeLegalBenchClassificationV2(AbsTaskClassificatio
|
|
|
4411
4306
|
class OverrulingLegalBenchClassification(AbsTaskClassification):
|
|
4412
4307
|
metadata = TaskMetadata(
|
|
4413
4308
|
name="OverrulingLegalBenchClassification",
|
|
4414
|
-
description="
|
|
4309
|
+
description="This task consists of classifying whether or not a particular sentence of case law overturns the decision of a previous case.",
|
|
4415
4310
|
reference="https://huggingface.co/datasets/nguha/legalbench",
|
|
4416
4311
|
dataset={
|
|
4417
4312
|
"path": "mteb/OverrulingLegalBenchClassification",
|
|
@@ -4455,8 +4350,7 @@ class OverrulingLegalBenchClassification(AbsTaskClassification):
|
|
|
4455
4350
|
class OverrulingLegalBenchClassificationV2(AbsTaskClassification):
|
|
4456
4351
|
metadata = TaskMetadata(
|
|
4457
4352
|
name="OverrulingLegalBenchClassification.v2",
|
|
4458
|
-
description="
|
|
4459
|
-
This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
|
|
4353
|
+
description="This task consists of classifying whether or not a particular sentence of case law overturns the decision of a previous case. This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
|
|
4460
4354
|
reference="https://huggingface.co/datasets/nguha/legalbench",
|
|
4461
4355
|
dataset={
|
|
4462
4356
|
"path": "mteb/overruling_legal_bench",
|
|
@@ -4500,7 +4394,7 @@ class OverrulingLegalBenchClassificationV2(AbsTaskClassification):
|
|
|
4500
4394
|
class PersonalJurisdictionLegalBenchClassification(AbsTaskClassification):
|
|
4501
4395
|
metadata = TaskMetadata(
|
|
4502
4396
|
name="PersonalJurisdictionLegalBenchClassification",
|
|
4503
|
-
description="
|
|
4397
|
+
description="Given a fact pattern describing the set of contacts between a plaintiff, defendant, and forum, determine if a court in that forum could exercise personal jurisdiction over the defendant.",
|
|
4504
4398
|
reference="https://huggingface.co/datasets/nguha/legalbench",
|
|
4505
4399
|
dataset={
|
|
4506
4400
|
"path": "mteb/PersonalJurisdictionLegalBenchClassification",
|
|
@@ -4535,7 +4429,7 @@ class PersonalJurisdictionLegalBenchClassification(AbsTaskClassification):
|
|
|
4535
4429
|
class PROALegalBenchClassification(AbsTaskClassification):
|
|
4536
4430
|
metadata = TaskMetadata(
|
|
4537
4431
|
name="PROALegalBenchClassification",
|
|
4538
|
-
description="
|
|
4432
|
+
description="Given a statute, determine if the text contains an explicit private right of action. Given a privacy policy clause and a description of the clause, determine if the description is correct. A private right of action (PROA) exists when a statute empowers an ordinary individual (i.e., a private person) to legally enforce their rights by bringing an action in court. In short, a PROA creates the ability for an individual to sue someone in order to recover damages or halt some offending conduct. PROAs are ubiquitous in antitrust law (in which individuals harmed by anti-competitive behavior can sue offending firms for compensation) and environmental law (in which individuals can sue entities which release hazardous substances for damages).",
|
|
4539
4433
|
reference="https://huggingface.co/datasets/nguha/legalbench",
|
|
4540
4434
|
dataset={
|
|
4541
4435
|
"path": "mteb/PROALegalBenchClassification",
|
|
@@ -47,8 +47,7 @@ class NewsClassification(AbsTaskClassification):
|
|
|
47
47
|
class NewsClassificationV2(AbsTaskClassification):
|
|
48
48
|
metadata = TaskMetadata(
|
|
49
49
|
name="NewsClassification.v2",
|
|
50
|
-
description="
|
|
51
|
-
This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
|
|
50
|
+
description="Large News Classification Dataset This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
|
|
52
51
|
dataset={
|
|
53
52
|
"path": "mteb/news",
|
|
54
53
|
"revision": "7c1f485c1f43d6aef852c5df6db23b047991a8e7",
|
|
@@ -10,7 +10,7 @@ class PatchCamelyonClassification(AbsTaskClassification):
|
|
|
10
10
|
|
|
11
11
|
metadata = TaskMetadata(
|
|
12
12
|
name="PatchCamelyon",
|
|
13
|
-
description="
|
|
13
|
+
description="Histopathology diagnosis classification dataset.",
|
|
14
14
|
reference="https://link.springer.com/chapter/10.1007/978-3-030-00934-2_24",
|
|
15
15
|
dataset={
|
|
16
16
|
"path": "clip-benchmark/wds_vtab-pcam",
|
|
@@ -50,8 +50,7 @@ M{\`a}rquez, Llu{\'\i}s},
|
|
|
50
50
|
class PatentClassificationV2(AbsTaskClassification):
|
|
51
51
|
metadata = TaskMetadata(
|
|
52
52
|
name="PatentClassification.v2",
|
|
53
|
-
description="
|
|
54
|
-
This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
|
|
53
|
+
description="Classification Dataset of Patents and Abstract This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
|
|
55
54
|
dataset={
|
|
56
55
|
"path": "mteb/patent",
|
|
57
56
|
"revision": "f5e5c81286448c68264300fe1e6f3de599922890",
|
|
@@ -41,8 +41,7 @@ class PoemSentimentClassification(AbsTaskClassification):
|
|
|
41
41
|
class PoemSentimentClassificationV2(AbsTaskClassification):
|
|
42
42
|
metadata = TaskMetadata(
|
|
43
43
|
name="PoemSentimentClassification.v2",
|
|
44
|
-
description="
|
|
45
|
-
This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
|
|
44
|
+
description="Poem Sentiment is a sentiment dataset of poem verses from Project Gutenberg. This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
|
|
46
45
|
reference="https://arxiv.org/abs/2011.02686",
|
|
47
46
|
dataset={
|
|
48
47
|
"path": "mteb/poem_sentiment",
|
|
@@ -47,8 +47,7 @@ class SDSEyeProtectionClassification(AbsTaskClassification):
|
|
|
47
47
|
class SDSEyeProtectionClassificationV2(AbsTaskClassification):
|
|
48
48
|
metadata = TaskMetadata(
|
|
49
49
|
name="SDSEyeProtectionClassification.v2",
|
|
50
|
-
description="
|
|
51
|
-
This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
|
|
50
|
+
description="ChemTEB evaluates the performance of text embedding models on chemical domain data. This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
|
|
52
51
|
reference="https://arxiv.org/abs/2412.00532",
|
|
53
52
|
dataset={
|
|
54
53
|
"path": "mteb/sds_eye_protection",
|
|
@@ -47,8 +47,7 @@ class SDSGlovesClassification(AbsTaskClassification):
|
|
|
47
47
|
class SDSGlovesClassificationV2(AbsTaskClassification):
|
|
48
48
|
metadata = TaskMetadata(
|
|
49
49
|
name="SDSGlovesClassification.v2",
|
|
50
|
-
description="
|
|
51
|
-
This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
|
|
50
|
+
description="ChemTEB evaluates the performance of text embedding models on chemical domain data. This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
|
|
52
51
|
reference="https://arxiv.org/abs/2412.00532",
|
|
53
52
|
dataset={
|
|
54
53
|
"path": "mteb/sds_gloves",
|
|
@@ -7,15 +7,7 @@ _EVAL_SPLITS = ["test"]
|
|
|
7
7
|
class ToxicChatClassification(AbsTaskClassification):
|
|
8
8
|
metadata = TaskMetadata(
|
|
9
9
|
name="ToxicChatClassification",
|
|
10
|
-
description="
|
|
11
|
-
prompts collected from the Vicuna online demo. We utilize a human-AI
|
|
12
|
-
collaborative annotation framework to guarantee the quality of annotation
|
|
13
|
-
while maintaining a feasible annotation workload. The details of data
|
|
14
|
-
collection, pre-processing, and annotation can be found in our paper.
|
|
15
|
-
We believe that ToxicChat can be a valuable resource to drive further
|
|
16
|
-
advancements toward building a safe and healthy environment for user-AI
|
|
17
|
-
interactions.
|
|
18
|
-
Only human annotated samples are selected here.""",
|
|
10
|
+
description="This dataset contains toxicity annotations on 10K user prompts collected from the Vicuna online demo. We utilize a human-AI collaborative annotation framework to guarantee the quality of annotation while maintaining a feasible annotation workload. The details of data collection, pre-processing, and annotation can be found in our paper. We believe that ToxicChat can be a valuable resource to drive further advancements toward building a safe and healthy environment for user-AI interactions. Only human annotated samples are selected here.",
|
|
19
11
|
reference="https://aclanthology.org/2023.findings-emnlp.311/",
|
|
20
12
|
dataset={
|
|
21
13
|
"path": "lmsys/toxic-chat",
|
|
@@ -68,16 +60,7 @@ class ToxicChatClassification(AbsTaskClassification):
|
|
|
68
60
|
class ToxicChatClassificationV2(AbsTaskClassification):
|
|
69
61
|
metadata = TaskMetadata(
|
|
70
62
|
name="ToxicChatClassification.v2",
|
|
71
|
-
description="
|
|
72
|
-
prompts collected from the Vicuna online demo. We utilize a human-AI
|
|
73
|
-
collaborative annotation framework to guarantee the quality of annotation
|
|
74
|
-
while maintaining a feasible annotation workload. The details of data
|
|
75
|
-
collection, pre-processing, and annotation can be found in our paper.
|
|
76
|
-
We believe that ToxicChat can be a valuable resource to drive further
|
|
77
|
-
advancements toward building a safe and healthy environment for user-AI
|
|
78
|
-
interactions.
|
|
79
|
-
Only human annotated samples are selected here.
|
|
80
|
-
This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
|
|
63
|
+
description="This dataset contains toxicity annotations on 10K user prompts collected from the Vicuna online demo. We utilize a human-AI collaborative annotation framework to guarantee the quality of annotation while maintaining a feasible annotation workload. The details of data collection, pre-processing, and annotation can be found in our paper. We believe that ToxicChat can be a valuable resource to drive further advancements toward building a safe and healthy environment for user-AI interactions. Only human annotated samples are selected here. This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
|
|
81
64
|
reference="https://aclanthology.org/2023.findings-emnlp.311/",
|
|
82
65
|
dataset={
|
|
83
66
|
"path": "mteb/toxic_chat",
|
|
@@ -51,8 +51,7 @@ class ToxicConversationsClassification(AbsTaskClassification):
|
|
|
51
51
|
class ToxicConversationsClassificationV2(AbsTaskClassification):
|
|
52
52
|
metadata = TaskMetadata(
|
|
53
53
|
name="ToxicConversationsClassification.v2",
|
|
54
|
-
description="
|
|
55
|
-
This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
|
|
54
|
+
description="Collection of comments from the Civil Comments platform together with annotations if the comment is toxic or not. This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
|
|
56
55
|
reference="https://www.kaggle.com/competitions/jigsaw-unintended-bias-in-toxicity-classification/overview",
|
|
57
56
|
dataset={
|
|
58
57
|
"path": "mteb/toxic_conversations",
|
|
@@ -46,8 +46,7 @@ class TweetSentimentExtractionClassification(AbsTaskClassification):
|
|
|
46
46
|
class TweetSentimentExtractionClassificationV2(AbsTaskClassification):
|
|
47
47
|
metadata = TaskMetadata(
|
|
48
48
|
name="TweetSentimentExtractionClassification.v2",
|
|
49
|
-
description=""
|
|
50
|
-
This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
|
|
49
|
+
description="This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
|
|
51
50
|
reference="https://www.kaggle.com/competitions/tweet-sentiment-extraction/overview",
|
|
52
51
|
dataset={
|
|
53
52
|
"path": "mteb/tweet_sentiment_extraction",
|
|
@@ -5,12 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
|
|
|
5
5
|
class TweetTopicSingleClassification(AbsTaskClassification):
|
|
6
6
|
metadata = TaskMetadata(
|
|
7
7
|
name="TweetTopicSingleClassification",
|
|
8
|
-
description="
|
|
9
|
-
TweetTopic comes with a timestamp which distributes from September 2019 to August 2021.
|
|
10
|
-
Tweets were preprocessed before the annotation to normalize some artifacts, converting
|
|
11
|
-
URLs into a special token {{URL}} and non-verified usernames into {{USERNAME}}. For verified
|
|
12
|
-
usernames, we replace its display name (or account name) with symbols {@}.
|
|
13
|
-
""",
|
|
8
|
+
description="Topic classification dataset on Twitter with 6 labels. Each instance of TweetTopic comes with a timestamp which distributes from September 2019 to August 2021. Tweets were preprocessed before the annotation to normalize some artifacts, converting URLs into a special token {{URL}} and non-verified usernames into {{USERNAME}}. For verified usernames, we replace its display name (or account name) with symbols {@}.",
|
|
14
9
|
dataset={
|
|
15
10
|
"path": "mteb/TweetTopicSingleClassification",
|
|
16
11
|
"revision": "b4280e921a2760ce34d2dd80a9e5dc8bcbf61785",
|
|
@@ -55,13 +50,7 @@ Barbieri, Francesco},
|
|
|
55
50
|
class TweetTopicSingleClassificationV2(AbsTaskClassification):
|
|
56
51
|
metadata = TaskMetadata(
|
|
57
52
|
name="TweetTopicSingleClassification.v2",
|
|
58
|
-
description="
|
|
59
|
-
TweetTopic comes with a timestamp which distributes from September 2019 to August 2021.
|
|
60
|
-
Tweets were preprocessed before the annotation to normalize some artifacts, converting
|
|
61
|
-
URLs into a special token {{URL}} and non-verified usernames into {{USERNAME}}. For verified
|
|
62
|
-
usernames, we replace its display name (or account name) with symbols {@}.
|
|
63
|
-
|
|
64
|
-
This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
|
|
53
|
+
description="Topic classification dataset on Twitter with 6 labels. Each instance of TweetTopic comes with a timestamp which distributes from September 2019 to August 2021. Tweets were preprocessed before the annotation to normalize some artifacts, converting URLs into a special token {{URL}} and non-verified usernames into {{USERNAME}}. For verified usernames, we replace its display name (or account name) with symbols {@}. This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
|
|
65
54
|
dataset={
|
|
66
55
|
"path": "mteb/tweet_topic_single",
|
|
67
56
|
"revision": "a7904e26081f987da81ad2cc063e09e714e875d0",
|
|
@@ -9,11 +9,7 @@ class UCF101Classification(AbsTaskClassification):
|
|
|
9
9
|
|
|
10
10
|
metadata = TaskMetadata(
|
|
11
11
|
name="UCF101",
|
|
12
|
-
description="
|
|
13
|
-
action videos collected from YouTube, having 101 action categories. This
|
|
14
|
-
version of the dataset does not contain images but images saved frame by
|
|
15
|
-
frame. Train and test splits are generated based on the authors' first
|
|
16
|
-
version train/test list.""",
|
|
12
|
+
description="UCF101 is an action recognition data set of realistic action videos collected from YouTube, having 101 action categories. This version of the dataset does not contain images but images saved frame by frame. Train and test splits are generated based on the authors' first version train/test list.",
|
|
17
13
|
reference="https://huggingface.co/datasets/flwrlabs/ucf101",
|
|
18
14
|
dataset={
|
|
19
15
|
"path": "flwrlabs/ucf101",
|
|
@@ -39,8 +39,7 @@ class WikipediaBioMetChemClassification(AbsTaskClassification):
|
|
|
39
39
|
class WikipediaBioMetChemClassificationV2(AbsTaskClassification):
|
|
40
40
|
metadata = TaskMetadata(
|
|
41
41
|
name="WikipediaBioMetChemClassification.v2",
|
|
42
|
-
description="
|
|
43
|
-
This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
|
|
42
|
+
description="ChemTEB evaluates the performance of text embedding models on chemical domain data. This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
|
|
44
43
|
reference="https://arxiv.org/abs/2412.00532",
|
|
45
44
|
dataset={
|
|
46
45
|
"path": "mteb/wikipedia_bio_met_chem",
|
|
@@ -39,8 +39,7 @@ class WikipediaChemFieldsClassification(AbsTaskClassification):
|
|
|
39
39
|
class WikipediaChemFieldsClassificationV2(AbsTaskClassification):
|
|
40
40
|
metadata = TaskMetadata(
|
|
41
41
|
name="WikipediaChemFieldsClassification.v2",
|
|
42
|
-
description="
|
|
43
|
-
This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
|
|
42
|
+
description="ChemTEB evaluates the performance of text embedding models on chemical domain data. This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
|
|
44
43
|
reference="https://arxiv.org/abs/2412.00532",
|
|
45
44
|
dataset={
|
|
46
45
|
"path": "mteb/wikipedia_chem_fields",
|
|
@@ -39,8 +39,7 @@ class WikipediaCompChemSpectroscopyClassification(AbsTaskClassification):
|
|
|
39
39
|
class WikipediaCompChemSpectroscopyClassificationV2(AbsTaskClassification):
|
|
40
40
|
metadata = TaskMetadata(
|
|
41
41
|
name="WikipediaCompChemSpectroscopyClassification.v2",
|
|
42
|
-
description="
|
|
43
|
-
This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
|
|
42
|
+
description="ChemTEB evaluates the performance of text embedding models on chemical domain data. This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
|
|
44
43
|
reference="https://arxiv.org/abs/2412.00532",
|
|
45
44
|
dataset={
|
|
46
45
|
"path": "mteb/wikipedia_comp_chem_spectroscopy",
|
|
@@ -39,8 +39,7 @@ class WikipediaCrystallographyAnalyticalClassification(AbsTaskClassification):
|
|
|
39
39
|
class WikipediaCrystallographyAnalyticalClassificationV2(AbsTaskClassification):
|
|
40
40
|
metadata = TaskMetadata(
|
|
41
41
|
name="WikipediaCrystallographyAnalyticalClassification.v2",
|
|
42
|
-
description="
|
|
43
|
-
This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
|
|
42
|
+
description="ChemTEB evaluates the performance of text embedding models on chemical domain data. This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
|
|
44
43
|
reference="https://arxiv.org/abs/2412.00532",
|
|
45
44
|
dataset={
|
|
46
45
|
"path": "mteb/wikipedia_crystallography_analytical",
|
|
@@ -39,8 +39,7 @@ class WikipediaTheoreticalAppliedClassification(AbsTaskClassification):
|
|
|
39
39
|
class WikipediaTheoreticalAppliedClassificationV2(AbsTaskClassification):
|
|
40
40
|
metadata = TaskMetadata(
|
|
41
41
|
name="WikipediaTheoreticalAppliedClassification.v2",
|
|
42
|
-
description="
|
|
43
|
-
This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
|
|
42
|
+
description="ChemTEB evaluates the performance of text embedding models on chemical domain data. This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
|
|
44
43
|
reference="https://arxiv.org/abs/2412.00532",
|
|
45
44
|
dataset={
|
|
46
45
|
"path": "mteb/wikipedia_theoretical_applied",
|
|
@@ -46,8 +46,7 @@ class YahooAnswersTopicsClassification(AbsTaskClassification):
|
|
|
46
46
|
class YahooAnswersTopicsClassificationV2(AbsTaskClassification):
|
|
47
47
|
metadata = TaskMetadata(
|
|
48
48
|
name="YahooAnswersTopicsClassification.v2",
|
|
49
|
-
description="
|
|
50
|
-
This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
|
|
49
|
+
description="Dataset composed of questions and answers from Yahoo Answers, categorized into topics. This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
|
|
51
50
|
reference="https://huggingface.co/datasets/yahoo_answers_topics",
|
|
52
51
|
dataset={
|
|
53
52
|
"path": "mteb/yahoo_answers_topics",
|
|
@@ -51,8 +51,7 @@ class YelpReviewFullClassification(AbsTaskClassification):
|
|
|
51
51
|
class YelpReviewFullClassificationV2(AbsTaskClassification):
|
|
52
52
|
metadata = TaskMetadata(
|
|
53
53
|
name="YelpReviewFullClassification.v2",
|
|
54
|
-
description="
|
|
55
|
-
This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
|
|
54
|
+
description="Yelp Review Full is a dataset for sentiment analysis, containing 5 classes corresponding to ratings 1-5. This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
|
|
56
55
|
reference="https://arxiv.org/abs/1509.01626",
|
|
57
56
|
dataset={
|
|
58
57
|
"path": "mteb/yelp_review_full",
|
|
@@ -45,7 +45,7 @@ class EstonianValenceClassification(AbsTaskClassification):
|
|
|
45
45
|
"valence", "label"
|
|
46
46
|
)
|
|
47
47
|
# convert label to a numbers
|
|
48
|
-
labels = self.dataset["train"]["label"]
|
|
48
|
+
labels = self.dataset["train"]["label"]
|
|
49
49
|
lab2idx = {lab: idx for idx, lab in enumerate(set(labels))}
|
|
50
50
|
self.dataset = self.dataset.map(
|
|
51
51
|
lambda x: {"label": lab2idx[x["label"]]}, remove_columns=["label"]
|
|
@@ -61,8 +61,7 @@ class EstonianValenceClassificationV2(AbsTaskClassification):
|
|
|
61
61
|
"path": "mteb/estonian_valence",
|
|
62
62
|
"revision": "8795961e2af5b83bcb8a6928636845ac2b92f92e",
|
|
63
63
|
},
|
|
64
|
-
description="
|
|
65
|
-
This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
|
|
64
|
+
description="Dataset containing annotated Estonian news data from the Postimees and Õhtuleht newspapers. This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
|
|
66
65
|
reference="https://figshare.com/articles/dataset/Estonian_Valence_Corpus_Eesti_valentsikorpus/24517054",
|
|
67
66
|
type="Classification",
|
|
68
67
|
category="t2c",
|