mteb 2.1.4__py3-none-any.whl → 2.7.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/__init__.py +6 -0
- mteb/_create_dataloaders.py +22 -20
- mteb/_evaluators/any_sts_evaluator.py +23 -14
- mteb/_evaluators/classification_metrics.py +54 -0
- mteb/_evaluators/clustering_evaluator.py +3 -3
- mteb/_evaluators/evaluator.py +4 -2
- mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +18 -11
- mteb/_evaluators/pair_classification_evaluator.py +34 -40
- mteb/_evaluators/retrieval_evaluator.py +2 -2
- mteb/_evaluators/retrieval_metrics.py +18 -17
- mteb/_evaluators/sklearn_evaluator.py +25 -37
- mteb/_evaluators/text/bitext_mining_evaluator.py +31 -19
- mteb/_evaluators/text/summarization_evaluator.py +27 -20
- mteb/_evaluators/zeroshot_classification_evaluator.py +7 -5
- mteb/abstasks/_data_filter/__init__.py +0 -0
- mteb/abstasks/_data_filter/filters.py +125 -0
- mteb/abstasks/_data_filter/task_pipelines.py +105 -0
- mteb/abstasks/_statistics_calculation.py +23 -11
- mteb/abstasks/_stratification.py +18 -18
- mteb/abstasks/abstask.py +35 -28
- mteb/abstasks/aggregate_task_metadata.py +1 -9
- mteb/abstasks/aggregated_task.py +10 -29
- mteb/abstasks/classification.py +15 -12
- mteb/abstasks/clustering.py +20 -16
- mteb/abstasks/clustering_legacy.py +13 -10
- mteb/abstasks/image/image_text_pair_classification.py +7 -4
- mteb/abstasks/multilabel_classification.py +33 -22
- mteb/abstasks/pair_classification.py +27 -11
- mteb/abstasks/regression.py +4 -4
- mteb/abstasks/retrieval.py +28 -24
- mteb/abstasks/retrieval_dataset_loaders.py +2 -2
- mteb/abstasks/sts.py +14 -4
- mteb/abstasks/task_metadata.py +32 -33
- mteb/abstasks/text/bitext_mining.py +39 -28
- mteb/abstasks/text/reranking.py +8 -6
- mteb/abstasks/text/summarization.py +10 -5
- mteb/abstasks/zeroshot_classification.py +8 -4
- mteb/benchmarks/_create_table.py +84 -37
- mteb/benchmarks/benchmark.py +77 -16
- mteb/benchmarks/benchmarks/__init__.py +12 -0
- mteb/benchmarks/benchmarks/benchmarks.py +361 -16
- mteb/benchmarks/get_benchmark.py +14 -53
- mteb/cache.py +227 -37
- mteb/cli/_display_tasks.py +2 -2
- mteb/cli/build_cli.py +110 -14
- mteb/cli/generate_model_card.py +43 -23
- mteb/deprecated_evaluator.py +71 -62
- mteb/descriptive_stats/BitextMining/RuSciBenchBitextMining.v2.json +61 -0
- mteb/descriptive_stats/Classification/HebrewSentimentAnalysis.v3.json +60 -0
- mteb/descriptive_stats/Classification/TurkishConstitutionalCourtViolation.json +54 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2CybersecurityRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EconomicRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EnergyRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2HrRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3ComputerScienceRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3EnergyRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceEnRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceFrRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3HrRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3IndustrialRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3NuclearRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PharmaceuticalsRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PhysicsRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3TelecomRetrieval.json +214 -0
- mteb/descriptive_stats/PairClassification/TERRa.V2.json +35 -0
- mteb/descriptive_stats/Reranking/JQaRARerankingLite.json +35 -0
- mteb/descriptive_stats/Reranking/JaCWIRRerankingLite.json +35 -0
- mteb/descriptive_stats/Reranking/MultiLongDocReranking.json +466 -0
- mteb/descriptive_stats/Retrieval/ArguAna-NL.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/ChemRxivRetrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/EuroPIRQRetrieval.json +116 -0
- mteb/descriptive_stats/Retrieval/JaCWIRRetrievalLite.json +30 -0
- mteb/descriptive_stats/Retrieval/JaqketRetrievalLite.json +30 -0
- mteb/descriptive_stats/Retrieval/MIRACLJaRetrievalLite.json +30 -0
- mteb/descriptive_stats/Retrieval/MrTyDiJaRetrievalLite.json +30 -0
- mteb/descriptive_stats/Retrieval/NFCorpus-NL.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoClimateFEVER-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoDBPedia-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoFEVER-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoHotpotQA-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoMSMARCO-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoNQ-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/SCIDOCS-NL.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/SQuADKorV1Retrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/SciFact-NL.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/TVPLRetrieval.json +30 -0
- mteb/evaluate.py +106 -75
- mteb/filter_tasks.py +25 -26
- mteb/get_tasks.py +29 -30
- mteb/languages/language_scripts.py +5 -3
- mteb/leaderboard/app.py +414 -151
- mteb/leaderboard/benchmark_selector.py +14 -5
- mteb/leaderboard/figures.py +13 -15
- mteb/leaderboard/table.py +82 -17
- mteb/load_results.py +12 -12
- mteb/models/__init__.py +4 -1
- mteb/models/abs_encoder.py +31 -23
- mteb/models/cache_wrappers/__init__.py +2 -1
- mteb/models/cache_wrappers/cache_backend_protocol.py +3 -5
- mteb/models/cache_wrappers/cache_backends/_hash_utils.py +7 -6
- mteb/models/cache_wrappers/cache_backends/faiss_cache.py +6 -2
- mteb/models/cache_wrappers/cache_backends/numpy_cache.py +43 -25
- mteb/models/cache_wrappers/cache_wrapper.py +3 -3
- mteb/models/get_model_meta.py +25 -118
- mteb/models/instruct_wrapper.py +33 -9
- mteb/models/model_implementations/align_models.py +8 -1
- mteb/models/model_implementations/amazon_models.py +1 -0
- mteb/models/model_implementations/andersborges.py +65 -0
- mteb/models/model_implementations/ara_models.py +9 -1
- mteb/models/model_implementations/arctic_models.py +16 -8
- mteb/models/model_implementations/b1ade_models.py +2 -1
- mteb/models/model_implementations/bedrock_models.py +4 -0
- mteb/models/model_implementations/bge_models.py +101 -17
- mteb/models/model_implementations/bica_model.py +35 -0
- mteb/models/model_implementations/blip2_models.py +13 -2
- mteb/models/model_implementations/blip_models.py +43 -16
- mteb/models/model_implementations/bm25.py +5 -4
- mteb/models/model_implementations/bmretriever_models.py +10 -4
- mteb/models/model_implementations/cadet_models.py +10 -1
- mteb/models/model_implementations/cde_models.py +25 -4
- mteb/models/model_implementations/clip_models.py +9 -6
- mteb/models/model_implementations/clips_models.py +100 -0
- mteb/models/model_implementations/codefuse_models.py +165 -3
- mteb/models/model_implementations/codesage_models.py +18 -3
- mteb/models/model_implementations/cohere_models.py +13 -6
- mteb/models/model_implementations/cohere_v.py +7 -2
- mteb/models/model_implementations/colpali_models.py +17 -9
- mteb/models/model_implementations/colqwen_models.py +275 -5
- mteb/models/model_implementations/colsmol_models.py +4 -2
- mteb/models/model_implementations/conan_models.py +2 -1
- mteb/models/model_implementations/dino_models.py +194 -23
- mteb/models/model_implementations/e5_instruct.py +27 -4
- mteb/models/model_implementations/e5_models.py +21 -110
- mteb/models/model_implementations/e5_v.py +7 -6
- mteb/models/model_implementations/eagerworks_models.py +164 -0
- mteb/models/model_implementations/emillykkejensen_models.py +91 -0
- mteb/models/model_implementations/en_code_retriever.py +2 -1
- mteb/models/model_implementations/euler_models.py +32 -0
- mteb/models/model_implementations/evaclip_models.py +4 -0
- mteb/models/model_implementations/fa_models.py +67 -9
- mteb/models/model_implementations/facebookai.py +205 -0
- mteb/models/model_implementations/geogpt_models.py +2 -1
- mteb/models/model_implementations/gme_v_models.py +17 -10
- mteb/models/model_implementations/google_models.py +17 -6
- mteb/models/model_implementations/granite_vision_embedding_models.py +8 -3
- mteb/models/model_implementations/gritlm_models.py +4 -2
- mteb/models/model_implementations/gte_models.py +99 -9
- mteb/models/model_implementations/hinvec_models.py +2 -1
- mteb/models/model_implementations/human.py +1 -0
- mteb/models/model_implementations/ibm_granite_models.py +36 -6
- mteb/models/model_implementations/inf_models.py +4 -2
- mteb/models/model_implementations/jasper_models.py +256 -3
- mteb/models/model_implementations/jina_clip.py +49 -10
- mteb/models/model_implementations/jina_models.py +222 -11
- mteb/models/model_implementations/kalm_models.py +203 -25
- mteb/models/model_implementations/kblab.py +37 -0
- mteb/models/model_implementations/kennethenevoldsen_models.py +74 -0
- mteb/models/model_implementations/kfst.py +25 -0
- mteb/models/model_implementations/kowshik24_models.py +32 -0
- mteb/models/model_implementations/lens_models.py +2 -0
- mteb/models/model_implementations/lgai_embedding_models.py +2 -1
- mteb/models/model_implementations/linq_models.py +4 -3
- mteb/models/model_implementations/listconranker.py +2 -2
- mteb/models/model_implementations/llm2clip_models.py +9 -6
- mteb/models/model_implementations/llm2vec_models.py +16 -8
- mteb/models/model_implementations/mcinext_models.py +7 -1
- mteb/models/model_implementations/mdbr_models.py +19 -3
- mteb/models/model_implementations/misc_models.py +422 -60
- mteb/models/model_implementations/mixedbread_ai_models.py +332 -0
- mteb/models/model_implementations/mme5_models.py +2 -1
- mteb/models/model_implementations/moco_models.py +15 -4
- mteb/models/model_implementations/mod_models.py +191 -0
- mteb/models/model_implementations/model2vec_models.py +27 -14
- mteb/models/model_implementations/moka_models.py +4 -1
- mteb/models/model_implementations/nbailab.py +70 -0
- mteb/models/model_implementations/no_instruct_sentence_models.py +3 -2
- mteb/models/model_implementations/nomic_models.py +173 -6
- mteb/models/model_implementations/nomic_models_vision.py +8 -3
- mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +32 -19
- mteb/models/model_implementations/nvidia_models.py +155 -20
- mteb/models/model_implementations/octen_models.py +254 -0
- mteb/models/model_implementations/openai_models.py +20 -16
- mteb/models/model_implementations/openclip_models.py +37 -13
- mteb/models/model_implementations/opensearch_neural_sparse_models.py +10 -5
- mteb/models/model_implementations/ops_moa_models.py +5 -3
- mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +1 -1
- mteb/models/model_implementations/pawan_models.py +39 -0
- mteb/models/model_implementations/piccolo_models.py +9 -1
- mteb/models/model_implementations/pixie_models.py +56 -0
- mteb/models/model_implementations/promptriever_models.py +12 -8
- mteb/models/model_implementations/pylate_models.py +46 -12
- mteb/models/model_implementations/qodo_models.py +4 -2
- mteb/models/model_implementations/qtack_models.py +2 -1
- mteb/models/model_implementations/qwen3_models.py +9 -6
- mteb/models/model_implementations/qzhou_models.py +5 -3
- mteb/models/model_implementations/random_baseline.py +19 -24
- mteb/models/model_implementations/rasgaard_models.py +34 -0
- mteb/models/model_implementations/reasonir_model.py +2 -1
- mteb/models/model_implementations/repllama_models.py +5 -3
- mteb/models/model_implementations/rerankers_custom.py +15 -9
- mteb/models/model_implementations/rerankers_monot5_based.py +31 -31
- mteb/models/model_implementations/richinfoai_models.py +2 -1
- mteb/models/model_implementations/ru_sentence_models.py +71 -20
- mteb/models/model_implementations/ruri_models.py +322 -0
- mteb/models/model_implementations/salesforce_models.py +6 -3
- mteb/models/model_implementations/samilpwc_models.py +2 -1
- mteb/models/model_implementations/sarashina_embedding_models.py +168 -0
- mteb/models/model_implementations/searchmap_models.py +2 -1
- mteb/models/model_implementations/seed_1_6_embedding_models.py +8 -2
- mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +625 -0
- mteb/models/model_implementations/seed_models.py +1 -0
- mteb/models/model_implementations/sentence_transformers_models.py +177 -18
- mteb/models/model_implementations/shuu_model.py +32 -31
- mteb/models/model_implementations/siglip_models.py +30 -20
- mteb/models/model_implementations/slm_models.py +416 -0
- mteb/models/model_implementations/sonar_models.py +1 -0
- mteb/models/model_implementations/spartan8806_atles_champion.py +34 -0
- mteb/models/model_implementations/stella_models.py +23 -4
- mteb/models/model_implementations/tarka_models.py +376 -0
- mteb/models/model_implementations/text2vec_models.py +9 -3
- mteb/models/model_implementations/ua_sentence_models.py +11 -1
- mteb/models/model_implementations/uae_models.py +8 -1
- mteb/models/model_implementations/vdr_models.py +3 -1
- mteb/models/model_implementations/vi_vn_models.py +45 -6
- mteb/models/model_implementations/vista_models.py +2 -0
- mteb/models/model_implementations/vlm2vec_models.py +5 -3
- mteb/models/model_implementations/voyage_models.py +99 -0
- mteb/models/model_implementations/voyage_v.py +17 -9
- mteb/models/model_implementations/xyz_models.py +1 -0
- mteb/models/model_implementations/youtu_models.py +2 -1
- mteb/models/model_implementations/yuan_models.py +34 -0
- mteb/models/model_implementations/yuan_models_en.py +58 -0
- mteb/models/model_meta.py +498 -29
- mteb/models/models_protocols.py +22 -6
- mteb/models/search_encoder_index/__init__.py +7 -0
- mteb/models/search_encoder_index/search_backend_protocol.py +50 -0
- mteb/models/search_encoder_index/search_indexes/__init__.py +5 -0
- mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +160 -0
- mteb/models/search_wrappers.py +197 -65
- mteb/models/sentence_transformer_wrapper.py +52 -32
- mteb/models/vllm_wrapper.py +327 -0
- mteb/py.typed +0 -0
- mteb/results/benchmark_results.py +114 -65
- mteb/results/model_result.py +63 -26
- mteb/results/task_result.py +117 -77
- mteb/similarity_functions.py +60 -7
- mteb/tasks/bitext_mining/multilingual/__init__.py +2 -1
- mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +4 -2
- mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining_fast.py +1 -1
- mteb/tasks/bitext_mining/multilingual/ru_sci_bench_bitext_mining.py +47 -5
- mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -6
- mteb/tasks/classification/ara/ajgt.py +1 -2
- mteb/tasks/classification/ara/hotel_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/ara/tweet_emotion_classification.py +1 -2
- mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -2
- mteb/tasks/classification/ben/bengali_document_classification.py +1 -2
- mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -2
- mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -2
- mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/ces/czech_product_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/ces/czech_so_me_sentiment_classification.py +1 -2
- mteb/tasks/classification/dan/angry_tweets_classification.py +1 -2
- mteb/tasks/classification/dan/danish_political_comments_classification.py +1 -2
- mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -2
- mteb/tasks/classification/dan/dk_hate_classification.py +2 -3
- mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -2
- mteb/tasks/classification/deu/ten_k_gnad_classification.py +1 -2
- mteb/tasks/classification/eng/amazon_polarity_classification.py +1 -2
- mteb/tasks/classification/eng/arxiv_classification.py +1 -2
- mteb/tasks/classification/eng/banking77_classification.py +1 -2
- mteb/tasks/classification/eng/dbpedia_classification.py +1 -2
- mteb/tasks/classification/eng/emotion_classification.py +1 -2
- mteb/tasks/classification/eng/financial_phrasebank_classification.py +1 -2
- mteb/tasks/classification/eng/frenk_en_classification.py +1 -2
- mteb/tasks/classification/eng/gtsrb_classification.py +1 -1
- mteb/tasks/classification/eng/imdb_classification.py +1 -2
- mteb/tasks/classification/eng/legal_bench_classification.py +14 -120
- mteb/tasks/classification/eng/news_classification.py +1 -2
- mteb/tasks/classification/eng/patch_camelyon_classification.py +1 -1
- mteb/tasks/classification/eng/patent_classification.py +1 -2
- mteb/tasks/classification/eng/poem_sentiment_classification.py +1 -2
- mteb/tasks/classification/eng/sds_eye_protection_classification.py +1 -2
- mteb/tasks/classification/eng/sds_gloves_classification.py +1 -2
- mteb/tasks/classification/eng/toxic_chat_classification.py +2 -19
- mteb/tasks/classification/eng/toxic_conversations_classification.py +1 -2
- mteb/tasks/classification/eng/tweet_sentiment_extraction_classification.py +1 -2
- mteb/tasks/classification/eng/tweet_topic_single_classification.py +2 -13
- mteb/tasks/classification/eng/ucf101_classification.py +1 -5
- mteb/tasks/classification/eng/wikipedia_bio_met_chem_classification.py +1 -2
- mteb/tasks/classification/eng/wikipedia_chem_fields_classification.py +1 -2
- mteb/tasks/classification/eng/wikipedia_comp_chem_spectroscopy_classification.py +1 -2
- mteb/tasks/classification/eng/wikipedia_crystallography_analytical_classification.py +1 -2
- mteb/tasks/classification/eng/wikipedia_theoretical_applied_classification.py +1 -2
- mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -2
- mteb/tasks/classification/eng/yelp_review_full_classification.py +1 -2
- mteb/tasks/classification/est/estonian_valence.py +2 -3
- mteb/tasks/classification/fas/fa_mteb_classification.py +7 -14
- mteb/tasks/classification/fil/filipino_hate_speech_classification.py +1 -2
- mteb/tasks/classification/fin/fin_toxicity_classification.py +2 -11
- mteb/tasks/classification/fra/french_book_reviews.py +1 -2
- mteb/tasks/classification/fra/movie_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/guj/gujarati_news_classification.py +1 -2
- mteb/tasks/classification/heb/__init__.py +6 -1
- mteb/tasks/classification/heb/hebrew_sentiment_analysis.py +62 -4
- mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -2
- mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -2
- mteb/tasks/classification/hrv/frenk_hr_classification.py +1 -2
- mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +1 -2
- mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -2
- mteb/tasks/classification/ita/italian_linguist_acceptability_classification.py +1 -2
- mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -2
- mteb/tasks/classification/jpn/wrime_classification.py +1 -2
- mteb/tasks/classification/kan/kannada_news_classification.py +1 -2
- mteb/tasks/classification/kor/klue_tc.py +1 -2
- mteb/tasks/classification/kor/kor_hate_classification.py +2 -17
- mteb/tasks/classification/kor/kor_sarcasm_classification.py +2 -19
- mteb/tasks/classification/kur/kurdish_sentiment_classification.py +3 -4
- mteb/tasks/classification/mal/malayalam_news_classification.py +1 -2
- mteb/tasks/classification/mar/marathi_news_classification.py +1 -2
- mteb/tasks/classification/mkd/macedonian_tweet_sentiment_classification.py +1 -2
- mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -6
- mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -4
- mteb/tasks/classification/multilingual/ru_sci_bench_classification.py +4 -23
- mteb/tasks/classification/multilingual/scala_classification.py +2 -3
- mteb/tasks/classification/multilingual/sib200_classification.py +1 -6
- mteb/tasks/classification/mya/myanmar_news.py +1 -2
- mteb/tasks/classification/nep/nepali_news_classification.py +1 -2
- mteb/tasks/classification/nld/dutch_book_review_sentiment_classification.py +4 -2
- mteb/tasks/classification/nld/dutch_cola_classification.py +3 -0
- mteb/tasks/classification/nld/dutch_government_bias_classification.py +3 -0
- mteb/tasks/classification/nld/dutch_news_articles_classification.py +3 -0
- mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +3 -0
- mteb/tasks/classification/nld/iconclass_classification.py +3 -0
- mteb/tasks/classification/nld/open_tender_classification.py +3 -0
- mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +3 -0
- mteb/tasks/classification/nob/no_rec_classification.py +1 -2
- mteb/tasks/classification/nob/norwegian_parliament_classification.py +1 -2
- mteb/tasks/classification/ory/odia_news_classification.py +1 -2
- mteb/tasks/classification/pol/polish_classification.py +3 -6
- mteb/tasks/classification/ron/moroco.py +1 -2
- mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -2
- mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -2
- mteb/tasks/classification/rus/georeview_classification.py +1 -2
- mteb/tasks/classification/rus/headline_classification.py +1 -2
- mteb/tasks/classification/rus/inappropriateness_classification.py +1 -2
- mteb/tasks/classification/rus/ru_reviews_classification.py +1 -2
- mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -2
- mteb/tasks/classification/rus/senti_ru_eval.py +1 -2
- mteb/tasks/classification/sin/sinhala_news_classification.py +1 -2
- mteb/tasks/classification/sin/sinhala_news_source_classification.py +1 -2
- mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/slk/slovak_hate_speech_classification.py +1 -2
- mteb/tasks/classification/slk/slovak_movie_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/slv/frenk_sl_classification.py +1 -2
- mteb/tasks/classification/spa/spanish_news_classification.py +1 -2
- mteb/tasks/classification/spa/spanish_sentiment_classification.py +1 -2
- mteb/tasks/classification/ssw/siswati_news_classification.py +1 -2
- mteb/tasks/classification/swa/swahili_news_classification.py +1 -2
- mteb/tasks/classification/swe/dalaj_classification.py +1 -2
- mteb/tasks/classification/swe/swe_rec_classification.py +1 -2
- mteb/tasks/classification/swe/swedish_sentiment_classification.py +1 -2
- mteb/tasks/classification/tam/tamil_news_classification.py +1 -2
- mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +1 -2
- mteb/tasks/classification/tha/wisesight_sentiment_classification.py +1 -2
- mteb/tasks/classification/tsn/tswana_news_classification.py +1 -2
- mteb/tasks/classification/tur/__init__.py +4 -0
- mteb/tasks/classification/tur/turkish_constitutional_court.py +41 -0
- mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +1 -2
- mteb/tasks/classification/tur/turkish_product_sentiment_classification.py +1 -2
- mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -15
- mteb/tasks/classification/urd/urdu_roman_sentiment_classification.py +1 -2
- mteb/tasks/classification/vie/amazon_counterfactual_vn_classification.py +1 -6
- mteb/tasks/classification/vie/amazon_polarity_vn_classification.py +1 -6
- mteb/tasks/classification/vie/amazon_reviews_vn_classification.py +1 -5
- mteb/tasks/classification/vie/banking77_vn_classification.py +1 -5
- mteb/tasks/classification/vie/emotion_vn_classification.py +1 -5
- mteb/tasks/classification/vie/imdb_vn_classification.py +1 -5
- mteb/tasks/classification/vie/massive_intent_vn_classification.py +1 -5
- mteb/tasks/classification/vie/massive_scenario_vn_classification.py +1 -5
- mteb/tasks/classification/vie/mtop_domain_vn_classification.py +1 -5
- mteb/tasks/classification/vie/mtop_intent_vn_classification.py +1 -5
- mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -5
- mteb/tasks/classification/vie/tweet_sentiment_extraction_vn_classification.py +1 -5
- mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -2
- mteb/tasks/classification/zho/cmteb_classification.py +5 -10
- mteb/tasks/classification/zho/yue_openrice_review_classification.py +1 -2
- mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -2
- mteb/tasks/clustering/eng/hume_wiki_cities_clustering.py +1 -1
- mteb/tasks/clustering/eng/wiki_cities_clustering.py +1 -1
- mteb/tasks/clustering/jpn/mews_c16_ja_clustering.py +1 -3
- mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -6
- mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +3 -0
- mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +3 -0
- mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +3 -0
- mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +3 -0
- mteb/tasks/clustering/nld/open_tender_clustering_s2s.py +3 -0
- mteb/tasks/clustering/nld/vabb_clustering_p2p.py +3 -0
- mteb/tasks/clustering/nld/vabb_clustering_s2s.py +3 -0
- mteb/tasks/clustering/vie/reddit_clustering_p2p_vn.py +1 -5
- mteb/tasks/clustering/vie/reddit_clustering_vn.py +1 -5
- mteb/tasks/clustering/vie/stack_exchange_clustering_p2p_vn.py +1 -5
- mteb/tasks/clustering/vie/stack_exchange_clustering_vn.py +1 -5
- mteb/tasks/clustering/vie/twenty_newsgroups_clustering_vn.py +1 -5
- mteb/tasks/clustering/zho/cmteb_clustering.py +2 -2
- mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +1 -1
- mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -5
- mteb/tasks/multilabel_classification/kor/kor_hate_speech_ml_classification.py +1 -9
- mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -6
- mteb/tasks/multilabel_classification/nld/covid_disinformation_nl_multi_label_classification.py +3 -0
- mteb/tasks/multilabel_classification/nld/vabb_multi_label_classification.py +3 -0
- mteb/tasks/multilabel_classification/por/brazilian_toxic_tweets_classification.py +1 -6
- mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
- mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -2
- mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -6
- mteb/tasks/pair_classification/eng/legal_bench_pc.py +1 -9
- mteb/tasks/pair_classification/nld/sick_nl_pair_classification.py +3 -0
- mteb/tasks/pair_classification/nld/xlwic_nl_pair_classification.py +3 -0
- mteb/tasks/pair_classification/rus/__init__.py +2 -2
- mteb/tasks/pair_classification/rus/terra.py +51 -25
- mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -5
- mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -5
- mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -5
- mteb/tasks/regression/multilingual/ru_sci_bench_regression.py +2 -6
- mteb/tasks/reranking/jpn/__init__.py +9 -1
- mteb/tasks/reranking/jpn/j_qa_ra_reranking_lite.py +49 -0
- mteb/tasks/reranking/jpn/ja_cwir_reranking_lite.py +47 -0
- mteb/tasks/reranking/multilingual/__init__.py +2 -0
- mteb/tasks/reranking/multilingual/multi_long_doc_reranking.py +70 -0
- mteb/tasks/reranking/multilingual/wikipedia_reranking_multilingual.py +1 -1
- mteb/tasks/reranking/multilingual/x_glue_wpr_reranking.py +1 -2
- mteb/tasks/reranking/vie/ask_ubuntu_dup_questions_vn.py +1 -5
- mteb/tasks/reranking/vie/sci_docs_reranking_vn.py +1 -5
- mteb/tasks/reranking/vie/stack_overflow_dup_questions_vn.py +1 -5
- mteb/tasks/retrieval/code/code_rag.py +12 -12
- mteb/tasks/retrieval/code/fresh_stack_retrieval.py +8 -5
- mteb/tasks/retrieval/dan/dan_fever_retrieval.py +1 -1
- mteb/tasks/retrieval/dan/tv2_nordretrieval.py +2 -2
- mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +2 -2
- mteb/tasks/retrieval/eng/__init__.py +2 -0
- mteb/tasks/retrieval/eng/chemrxiv.py +33 -0
- mteb/tasks/retrieval/eng/cub200_i2i_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -8
- mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +4 -0
- mteb/tasks/retrieval/jpn/__init__.py +8 -0
- mteb/tasks/retrieval/jpn/ja_cwir_retrieval.py +1 -4
- mteb/tasks/retrieval/jpn/ja_cwir_retrieval_lite.py +47 -0
- mteb/tasks/retrieval/jpn/jaqket_retrieval_lite.py +50 -0
- mteb/tasks/retrieval/jpn/miracl_ja_retrieval_lite.py +52 -0
- mteb/tasks/retrieval/jpn/mr_tydi_ja_retrieval_lite.py +48 -0
- mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +11 -4
- mteb/tasks/retrieval/kor/__init__.py +16 -1
- mteb/tasks/retrieval/kor/kovidore2_bench_retrieval.py +142 -0
- mteb/tasks/retrieval/kor/squad_kor_v1_retrieval.py +47 -0
- mteb/tasks/retrieval/multilingual/__init__.py +24 -0
- mteb/tasks/retrieval/multilingual/belebele_retrieval.py +5 -4
- mteb/tasks/retrieval/multilingual/euro_pirq_retrieval.py +43 -0
- mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +56 -42
- mteb/tasks/retrieval/multilingual/mkqa_retrieval.py +1 -2
- mteb/tasks/retrieval/multilingual/mlqa_retrieval.py +1 -4
- mteb/tasks/retrieval/multilingual/multi_long_doc_retrieval.py +1 -2
- mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +9 -4
- mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +2 -12
- mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +4 -2
- mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +389 -0
- mteb/tasks/retrieval/nld/__init__.py +8 -4
- mteb/tasks/retrieval/nld/argu_ana_nl_retrieval.py +46 -27
- mteb/tasks/retrieval/nld/bbsard_nl_retrieval.py +3 -0
- mteb/tasks/retrieval/nld/dutch_news_articles_retrieval.py +3 -0
- mteb/tasks/retrieval/nld/legal_qa_nl_retrieval.py +3 -0
- mteb/tasks/retrieval/nld/nf_corpus_nl_retrieval.py +42 -25
- mteb/tasks/retrieval/nld/open_tender_retrieval.py +3 -0
- mteb/tasks/retrieval/nld/sci_fact_nl_retrieval.py +42 -24
- mteb/tasks/retrieval/nld/scidocsnl_retrieval.py +44 -27
- mteb/tasks/retrieval/nld/vabb_retrieval.py +3 -0
- mteb/tasks/retrieval/nob/norquad.py +2 -2
- mteb/tasks/retrieval/nob/snl_retrieval.py +2 -2
- mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -7
- mteb/tasks/retrieval/tur/tur_hist_quad.py +1 -1
- mteb/tasks/retrieval/vie/__init__.py +14 -6
- mteb/tasks/retrieval/vie/argu_ana_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +40 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_android_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_gis_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_mathematica_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_physics_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_programmers_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_stats_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_tex_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_unix_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_webmasters_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_wordpress_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +40 -5
- mteb/tasks/retrieval/vie/fevervn_retrieval.py +40 -7
- mteb/tasks/retrieval/vie/fi_qa2018_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/green_node_table_markdown_retrieval.py +16 -1
- mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +40 -6
- mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +49 -5
- mteb/tasks/retrieval/vie/nf_corpus_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/nqvn_retrieval.py +40 -5
- mteb/tasks/retrieval/vie/quora_vn_retrieval.py +1 -6
- mteb/tasks/retrieval/vie/sci_fact_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/scidocsvn_retrieval.py +1 -6
- mteb/tasks/retrieval/vie/touche2020_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/treccovidvn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/tvpl_retrieval.py +42 -0
- mteb/tasks/retrieval/vie/zac_legal_text_retrieval.py +15 -1
- mteb/tasks/sts/nld/sick_nl_sts.py +1 -0
- mteb/tasks/sts/vie/biosses_stsvn.py +1 -5
- mteb/tasks/sts/vie/sickr_stsvn.py +1 -5
- mteb/tasks/sts/vie/sts_benchmark_stsvn.py +1 -5
- mteb/tasks/zeroshot_classification/eng/gtsrb.py +1 -1
- mteb/tasks/zeroshot_classification/eng/patch_camelyon.py +1 -1
- mteb/tasks/zeroshot_classification/eng/ucf101.py +1 -5
- mteb/types/__init__.py +2 -0
- mteb/types/_encoder_io.py +19 -2
- mteb/types/_result.py +2 -1
- mteb/types/statistics.py +9 -3
- {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/METADATA +25 -8
- {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/RECORD +525 -438
- mteb/models/model_implementations/mxbai_models.py +0 -102
- mteb/models/model_implementations/nb_sbert.py +0 -25
- {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/WHEEL +0 -0
- {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/entry_points.txt +0 -0
- {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/top_level.txt +0 -0
mteb/similarity_functions.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import torch
|
|
2
2
|
|
|
3
3
|
from mteb.models import EncoderProtocol
|
|
4
|
+
from mteb.models.model_meta import ScoringFunction
|
|
4
5
|
from mteb.types import Array
|
|
5
6
|
|
|
6
7
|
|
|
@@ -38,6 +39,54 @@ def compute_pairwise_similarity(
|
|
|
38
39
|
return pairwise_cos_sim(embedding1, embedding2)
|
|
39
40
|
|
|
40
41
|
|
|
42
|
+
def select_similarity(
|
|
43
|
+
embedding1: Array,
|
|
44
|
+
embedding2: Array,
|
|
45
|
+
similarity_fn: ScoringFunction,
|
|
46
|
+
) -> Array:
|
|
47
|
+
"""Compute similarity between two sets of embeddings using the specified similarity function.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
embedding1: The first set of embeddings.
|
|
51
|
+
embedding2: The second set of embeddings.
|
|
52
|
+
similarity_fn: The similarity function to use (COSINE, DOT_PRODUCT, EUCLIDEAN).
|
|
53
|
+
|
|
54
|
+
Returns:
|
|
55
|
+
Array: The computed similarity scores.
|
|
56
|
+
"""
|
|
57
|
+
if similarity_fn is ScoringFunction.COSINE:
|
|
58
|
+
return cos_sim(embedding1, embedding2)
|
|
59
|
+
elif similarity_fn is ScoringFunction.DOT_PRODUCT:
|
|
60
|
+
return dot_score(embedding1, embedding2)
|
|
61
|
+
elif similarity_fn is ScoringFunction.EUCLIDEAN:
|
|
62
|
+
return euclidean_sim(embedding1, embedding2)
|
|
63
|
+
raise ValueError(f"Unsupported similarity function: {similarity_fn}")
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def select_pairwise_similarity(
|
|
67
|
+
embedding1: Array,
|
|
68
|
+
embedding2: Array,
|
|
69
|
+
similarity_fn: ScoringFunction,
|
|
70
|
+
) -> Array:
|
|
71
|
+
"""Compute pairwise similarity between two sets of embeddings using the specified similarity function.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
embedding1: The first set of embeddings.
|
|
75
|
+
embedding2: The second set of embeddings.
|
|
76
|
+
similarity_fn: The similarity function to use (COSINE, DOT_PRODUCT, EUCLIDEAN).
|
|
77
|
+
|
|
78
|
+
Returns:
|
|
79
|
+
Array: The computed pairwise similarity scores.
|
|
80
|
+
"""
|
|
81
|
+
if similarity_fn is ScoringFunction.COSINE:
|
|
82
|
+
return pairwise_cos_sim(embedding1, embedding2)
|
|
83
|
+
elif similarity_fn is ScoringFunction.DOT_PRODUCT:
|
|
84
|
+
return pairwise_dot_score(embedding1, embedding2)
|
|
85
|
+
elif similarity_fn is ScoringFunction.EUCLIDEAN:
|
|
86
|
+
return pairwise_euclidean_sim(embedding1, embedding2)
|
|
87
|
+
raise ValueError(f"Unsupported similarity function: {similarity_fn}")
|
|
88
|
+
|
|
89
|
+
|
|
41
90
|
def _normalize_embeddings(embeddings: Array) -> torch.Tensor:
|
|
42
91
|
"""Normalizes the embeddings matrix, so that each sentence embedding has unit length.
|
|
43
92
|
|
|
@@ -137,7 +186,7 @@ def max_sim(a: Array, b: Array) -> torch.Tensor:
|
|
|
137
186
|
b,
|
|
138
187
|
)
|
|
139
188
|
|
|
140
|
-
return scores.max(axis=-1).values.sum(axis=-1)
|
|
189
|
+
return scores.max(axis=-1).values.sum(axis=-1) # type: ignore[call-overload]
|
|
141
190
|
|
|
142
191
|
|
|
143
192
|
# https://github.com/lightonai/pylate/blob/2d094a724866d6e15701781528368438081c0157/pylate/scores/scores.py#L67C1-L122C38
|
|
@@ -168,7 +217,7 @@ def pairwise_max_sim(
|
|
|
168
217
|
document_embedding,
|
|
169
218
|
)
|
|
170
219
|
|
|
171
|
-
scores.append(query_document_score.max(axis=-1).values.sum())
|
|
220
|
+
scores.append(query_document_score.max(axis=-1).values.sum()) # type: ignore[call-overload]
|
|
172
221
|
|
|
173
222
|
return torch.stack(scores, dim=0)
|
|
174
223
|
|
|
@@ -268,11 +317,15 @@ def similarity(text_embeddings: Array, input_embeddings: Array) -> Array:
|
|
|
268
317
|
Returns:
|
|
269
318
|
Matrix with similarities
|
|
270
319
|
"""
|
|
271
|
-
|
|
272
|
-
|
|
320
|
+
text_embeddings_tensor = _convert_to_tensor(text_embeddings)
|
|
321
|
+
input_embeddings_tensor = _convert_to_tensor(input_embeddings)
|
|
273
322
|
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
323
|
+
text_embeddings_tensor = text_embeddings_tensor / text_embeddings_tensor.norm(
|
|
324
|
+
dim=-1, keepdim=True
|
|
325
|
+
)
|
|
326
|
+
input_embeddings_tensor = input_embeddings_tensor / input_embeddings_tensor.norm(
|
|
327
|
+
dim=-1, keepdim=True
|
|
328
|
+
)
|
|
329
|
+
logits = torch.matmul(input_embeddings_tensor, text_embeddings_tensor.T)
|
|
277
330
|
probs = (logits * 100).softmax(dim=-1)
|
|
278
331
|
return probs
|
|
@@ -16,7 +16,7 @@ from .nusa_translation_bitext_mining import NusaTranslationBitextMining
|
|
|
16
16
|
from .nusa_x_bitext_mining import NusaXBitextMining
|
|
17
17
|
from .phinc_bitext_mining import PhincBitextMining
|
|
18
18
|
from .roma_tales_bitext_mining import RomaTalesBitextMining
|
|
19
|
-
from .ru_sci_bench_bitext_mining import RuSciBenchBitextMining
|
|
19
|
+
from .ru_sci_bench_bitext_mining import RuSciBenchBitextMining, RuSciBenchBitextMiningV2
|
|
20
20
|
from .tatoeba_bitext_mining import TatoebaBitextMining
|
|
21
21
|
from .web_faq_bitext_mining import WebFAQBitextMiningQAs, WebFAQBitextMiningQuestions
|
|
22
22
|
|
|
@@ -40,6 +40,7 @@ __all__ = [
|
|
|
40
40
|
"PhincBitextMining",
|
|
41
41
|
"RomaTalesBitextMining",
|
|
42
42
|
"RuSciBenchBitextMining",
|
|
43
|
+
"RuSciBenchBitextMiningV2",
|
|
43
44
|
"TatoebaBitextMining",
|
|
44
45
|
"WebFAQBitextMiningQAs",
|
|
45
46
|
"WebFAQBitextMiningQuestions",
|
|
@@ -23,7 +23,7 @@ class BUCCBitextMining(AbsTaskBitextMining):
|
|
|
23
23
|
"path": "mteb/BUCC",
|
|
24
24
|
"revision": "414572247440f0ccacf7eb0bb70a31533a0e5443",
|
|
25
25
|
},
|
|
26
|
-
description="BUCC bitext mining dataset",
|
|
26
|
+
description="BUCC bitext mining dataset train split.",
|
|
27
27
|
reference="https://comparable.limsi.fr/bucc2018/bucc2018-task.html",
|
|
28
28
|
type="BitextMining",
|
|
29
29
|
category="t2t",
|
|
@@ -71,7 +71,9 @@ Rapp, Reinhard},
|
|
|
71
71
|
|
|
72
72
|
sentence1 = data["sentence1"][0]
|
|
73
73
|
sentence2 = data["sentence2"][0]
|
|
74
|
-
sentence1 = [
|
|
74
|
+
sentence1 = [
|
|
75
|
+
sentence1[i] for (i, j) in gold
|
|
76
|
+
] # keep only sentences in gold. The 2nd value is meant for sentence2 but not used here. This is fixed in BUCC.v2.
|
|
75
77
|
logger.info(f"Lang {lang} num gold {len(gold)}")
|
|
76
78
|
logger.info(f"Lang {lang} num sentence1 {len(sentence1)}")
|
|
77
79
|
logger.info(f"Lang {lang} num sentence2 {len(sentence2)}")
|
|
@@ -20,7 +20,7 @@ class BUCCBitextMiningFast(AbsTaskBitextMining):
|
|
|
20
20
|
"path": "mteb/bucc-bitext-mining",
|
|
21
21
|
"revision": "1739dc11ffe9b7bfccd7f3d585aeb4c544fc6677",
|
|
22
22
|
},
|
|
23
|
-
description="BUCC bitext mining dataset",
|
|
23
|
+
description="BUCC bitext mining dataset train split, gold set only.",
|
|
24
24
|
reference="https://comparable.limsi.fr/bucc2018/bucc2018-task.html",
|
|
25
25
|
type="BitextMining",
|
|
26
26
|
category="t2t",
|
|
@@ -10,11 +10,53 @@ class RuSciBenchBitextMining(AbsTaskBitextMining):
|
|
|
10
10
|
"path": "mlsa-iai-msu-lab/ru_sci_bench_bitext_mining",
|
|
11
11
|
"revision": "e5840033c5cf2573932db027ac8001fe0a7eb6fa",
|
|
12
12
|
},
|
|
13
|
-
description="
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
13
|
+
description="This task focuses on finding translations of scientific articles. The dataset is sourced from eLibrary, Russia's largest electronic library of scientific publications. Russian authors often provide English translations for their abstracts and titles, and the data consists of these paired titles and abstracts. The task evaluates a model's ability to match an article's Russian title and abstract to its English counterpart, or vice versa.",
|
|
14
|
+
reference="https://github.com/mlsa-iai-msu-lab/ru_sci_bench_mteb",
|
|
15
|
+
type="BitextMining",
|
|
16
|
+
category="t2c",
|
|
17
|
+
modalities=["text"],
|
|
18
|
+
eval_splits=["test"],
|
|
19
|
+
eval_langs={
|
|
20
|
+
"ru-en": ["rus-Cyrl", "eng-Latn"],
|
|
21
|
+
"en-ru": ["eng-Latn", "rus-Cyrl"],
|
|
22
|
+
},
|
|
23
|
+
main_score="f1",
|
|
24
|
+
date=("2007-01-01", "2023-01-01"),
|
|
25
|
+
domains=["Academic", "Non-fiction", "Written"],
|
|
26
|
+
task_subtypes=[],
|
|
27
|
+
license="not specified",
|
|
28
|
+
dialect=[],
|
|
29
|
+
sample_creation="found",
|
|
30
|
+
annotations_creators="derived",
|
|
31
|
+
bibtex_citation=r"""
|
|
32
|
+
@article{vatolin2024ruscibench,
|
|
33
|
+
author = {Vatolin, A. and Gerasimenko, N. and Ianina, A. and Vorontsov, K.},
|
|
34
|
+
doi = {10.1134/S1064562424602191},
|
|
35
|
+
issn = {1531-8362},
|
|
36
|
+
journal = {Doklady Mathematics},
|
|
37
|
+
month = {12},
|
|
38
|
+
number = {1},
|
|
39
|
+
pages = {S251--S260},
|
|
40
|
+
title = {RuSciBench: Open Benchmark for Russian and English Scientific Document Representations},
|
|
41
|
+
url = {https://doi.org/10.1134/S1064562424602191},
|
|
42
|
+
volume = {110},
|
|
43
|
+
year = {2024},
|
|
44
|
+
}
|
|
45
|
+
""",
|
|
46
|
+
prompt="Given the following title and abstract of the scientific article, find its translation",
|
|
47
|
+
superseded_by="RuSciBenchBitextMining.v2",
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class RuSciBenchBitextMiningV2(AbsTaskBitextMining):
|
|
52
|
+
fast_loading = True
|
|
53
|
+
metadata = TaskMetadata(
|
|
54
|
+
name="RuSciBenchBitextMining.v2",
|
|
55
|
+
dataset={
|
|
56
|
+
"path": "mlsa-iai-msu-lab/ru_sci_bench_bitext_mining",
|
|
57
|
+
"revision": "20e815e8ac8787331546386dfd177821510f79a3",
|
|
58
|
+
},
|
|
59
|
+
description="This task focuses on finding translations of scientific articles. The dataset is sourced from eLibrary, Russia's largest electronic library of scientific publications. Russian authors often provide English translations for their abstracts and titles, and the data consists of these paired titles and abstracts. The task evaluates a model's ability to match an article's Russian title and abstract to its English counterpart, or vice versa. Compared to the previous version, 6 erroneous examples have been removed.",
|
|
18
60
|
reference="https://github.com/mlsa-iai-msu-lab/ru_sci_bench_mteb",
|
|
19
61
|
type="BitextMining",
|
|
20
62
|
category="t2c",
|
|
@@ -198,9 +198,7 @@ _SPLITS = ["default"]
|
|
|
198
198
|
class WebFAQBitextMiningQuestions(AbsTaskBitextMining):
|
|
199
199
|
metadata = TaskMetadata(
|
|
200
200
|
name="WebFAQBitextMiningQuestions",
|
|
201
|
-
description=
|
|
202
|
-
A sentence in the "WebFAQBitextMiningQuestions" task is the question originating from an aligned QA.
|
|
203
|
-
The dataset is sourced from FAQ pages on the web.""",
|
|
201
|
+
description='The WebFAQ Bitext Dataset consists of natural FAQ-style Question-Answer pairs that align across languages. A sentence in the "WebFAQBitextMiningQuestions" task is the question originating from an aligned QA. The dataset is sourced from FAQ pages on the web.',
|
|
204
202
|
reference="https://huggingface.co/PaDaS-Lab",
|
|
205
203
|
dataset={
|
|
206
204
|
"path": "PaDaS-Lab/webfaq-bitexts",
|
|
@@ -254,9 +252,7 @@ The dataset is sourced from FAQ pages on the web.""",
|
|
|
254
252
|
class WebFAQBitextMiningQAs(AbsTaskBitextMining):
|
|
255
253
|
metadata = TaskMetadata(
|
|
256
254
|
name="WebFAQBitextMiningQAs",
|
|
257
|
-
description=
|
|
258
|
-
A sentence in the "WebFAQBitextMiningQAs" task is a concatenation of a question and its corresponding answer.
|
|
259
|
-
The dataset is sourced from FAQ pages on the web.""",
|
|
255
|
+
description='The WebFAQ Bitext Dataset consists of natural FAQ-style Question-Answer pairs that align across languages. A sentence in the "WebFAQBitextMiningQAs" task is a concatenation of a question and its corresponding answer. The dataset is sourced from FAQ pages on the web.',
|
|
260
256
|
reference="https://huggingface.co/PaDaS-Lab",
|
|
261
257
|
dataset={
|
|
262
258
|
"path": "PaDaS-Lab/webfaq-bitexts",
|
|
@@ -45,8 +45,7 @@ class AJGTV2(AbsTaskClassification):
|
|
|
45
45
|
"path": "mteb/ajgt",
|
|
46
46
|
"revision": "0a3dea7301ee0c051891f04d32f3e8577a9eae36",
|
|
47
47
|
},
|
|
48
|
-
description="
|
|
49
|
-
This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2632)""",
|
|
48
|
+
description="Arabic Jordanian General Tweets (AJGT) Corpus consisted of 1,800 tweets (900 for training and 900 for testing) annotated as positive and negative. Modern Standard Arabic (MSA) or Jordanian dialect. This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2632)",
|
|
50
49
|
reference="https://link.springer.com/chapter/10.1007/978-3-319-60042-0_66/",
|
|
51
50
|
type="Classification",
|
|
52
51
|
category="t2c",
|
|
@@ -45,8 +45,7 @@ class HotelReviewSentimentClassificationV2(AbsTaskClassification):
|
|
|
45
45
|
"path": "mteb/HotelReviewSentimentClassification",
|
|
46
46
|
"revision": "f5e6a24acbed4182114ffdf46747090b3f51e836",
|
|
47
47
|
},
|
|
48
|
-
description="
|
|
49
|
-
This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2632)""",
|
|
48
|
+
description="HARD is a dataset of Arabic hotel reviews collected from the Booking.com website. This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2632)",
|
|
50
49
|
reference="https://link.springer.com/chapter/10.1007/978-3-319-67056-0_3",
|
|
51
50
|
type="Classification",
|
|
52
51
|
category="t2c",
|
|
@@ -41,8 +41,7 @@ class OnlineStoreReviewSentimentClassificationV2(AbsTaskClassification):
|
|
|
41
41
|
"path": "mteb/online_store_review_sentiment",
|
|
42
42
|
"revision": "de0e8eed65adf1cbc58f8743a5f5c5df556de4c4",
|
|
43
43
|
},
|
|
44
|
-
description="
|
|
45
|
-
This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2632)""",
|
|
44
|
+
description="This dataset contains Arabic reviews of products from the SHEIN online store. This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2632)",
|
|
46
45
|
reference="https://huggingface.co/datasets/Ruqiya/Arabic_Reviews_of_SHEIN",
|
|
47
46
|
type="Classification",
|
|
48
47
|
category="t2c",
|
|
@@ -52,8 +52,7 @@ class RestaurantReviewSentimentClassificationV2(AbsTaskClassification):
|
|
|
52
52
|
"path": "mteb/restaurant_review_sentiment",
|
|
53
53
|
"revision": "5d28c1e8fb393173a849696ed178b90a6f78754a",
|
|
54
54
|
},
|
|
55
|
-
description="
|
|
56
|
-
This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2632)""",
|
|
55
|
+
description="Dataset of 8156 restaurant reviews from qaym.com in Arabic for sentiment analysis This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2632)",
|
|
57
56
|
reference="https://link.springer.com/chapter/10.1007/978-3-319-18117-2_2",
|
|
58
57
|
type="Classification",
|
|
59
58
|
category="t2c",
|
|
@@ -45,8 +45,7 @@ class TweetEmotionClassificationV2(AbsTaskClassification):
|
|
|
45
45
|
"path": "mteb/TweetEmotionClassification",
|
|
46
46
|
"revision": "930d65840c089406ceed5241b1a9ba7294e5eeae",
|
|
47
47
|
},
|
|
48
|
-
description="
|
|
49
|
-
This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2632)""",
|
|
48
|
+
description="A dataset of 10,012 tweets that was created with the aim of covering the most frequently used emotion categories in Arabic tweets. This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2632)",
|
|
50
49
|
reference="https://link.springer.com/chapter/10.1007/978-3-319-77116-8_8",
|
|
51
50
|
type="Classification",
|
|
52
51
|
category="t2c",
|
|
@@ -62,8 +62,7 @@ class TweetSarcasmClassificationV2(AbsTaskClassification):
|
|
|
62
62
|
"path": "mteb/tweet_sarcasm",
|
|
63
63
|
"revision": "3a20898e2ea3303844e907d55f7a815a7644150d",
|
|
64
64
|
},
|
|
65
|
-
description="
|
|
66
|
-
This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2632)""",
|
|
65
|
+
description="Arabic sarcasm detection dataset, which was created through the reannotation of available Arabic sentiment analysis datasets. This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2632)",
|
|
67
66
|
reference="https://aclanthology.org/2020.osact-1.5/",
|
|
68
67
|
type="Classification",
|
|
69
68
|
category="t2c",
|
|
@@ -55,8 +55,7 @@ Islam, Tanvir},
|
|
|
55
55
|
class BengaliDocumentClassificationV2(AbsTaskClassification):
|
|
56
56
|
metadata = TaskMetadata(
|
|
57
57
|
name="BengaliDocumentClassification.v2",
|
|
58
|
-
description="
|
|
59
|
-
This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2632)""",
|
|
58
|
+
description="Dataset for News Classification, categorized with 13 domains. This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2632)",
|
|
60
59
|
reference="https://aclanthology.org/2023.eacl-main.4",
|
|
61
60
|
dataset={
|
|
62
61
|
"path": "mteb/bengali_document",
|
|
@@ -45,8 +45,7 @@ class BengaliHateSpeechClassification(AbsTaskClassification):
|
|
|
45
45
|
class BengaliHateSpeechClassificationV2(AbsTaskClassification):
|
|
46
46
|
metadata = TaskMetadata(
|
|
47
47
|
name="BengaliHateSpeechClassification.v2",
|
|
48
|
-
description="
|
|
49
|
-
This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2632)""",
|
|
48
|
+
description="The Bengali Hate Speech Dataset is a Bengali-language dataset of news articles collected from various Bengali media sources and categorized based on the type of hate in the text. This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2632)",
|
|
50
49
|
reference="https://huggingface.co/datasets/bn_hate_speech",
|
|
51
50
|
dataset={
|
|
52
51
|
"path": "mteb/bengali_hate_speech",
|
|
@@ -45,8 +45,7 @@ class BengaliSentimentAnalysis(AbsTaskClassification):
|
|
|
45
45
|
class BengaliSentimentAnalysisV2(AbsTaskClassification):
|
|
46
46
|
metadata = TaskMetadata(
|
|
47
47
|
name="BengaliSentimentAnalysis.v2",
|
|
48
|
-
description="
|
|
49
|
-
This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2632)""",
|
|
48
|
+
description="dataset contains 2854 Negative reviews and 7238 Positive reviews collected and manually annotated from Youtube Bengali drama. This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2632)",
|
|
50
49
|
reference="https://data.mendeley.com/datasets/p6zc7krs37/4",
|
|
51
50
|
dataset={
|
|
52
51
|
"path": "mteb/bengali_sentiment_analysis",
|
|
@@ -51,8 +51,7 @@ class CSFDCZMovieReviewSentimentClassification(AbsTaskClassification):
|
|
|
51
51
|
class CSFDCZMovieReviewSentimentClassificationV2(AbsTaskClassification):
|
|
52
52
|
metadata = TaskMetadata(
|
|
53
53
|
name="CSFDCZMovieReviewSentimentClassification.v2",
|
|
54
|
-
description="
|
|
55
|
-
This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
|
|
54
|
+
description="The dataset contains 30k user reviews from csfd.cz in Czech. This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
|
|
56
55
|
reference="https://arxiv.org/abs/2304.01922",
|
|
57
56
|
dataset={
|
|
58
57
|
"path": "mteb/csfdcz_movie_review_sentiment",
|
|
@@ -58,8 +58,7 @@ Montoyo, Andres},
|
|
|
58
58
|
class CzechProductReviewSentimentClassificationV2(AbsTaskClassification):
|
|
59
59
|
metadata = TaskMetadata(
|
|
60
60
|
name="CzechProductReviewSentimentClassification.v2",
|
|
61
|
-
description="
|
|
62
|
-
This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
|
|
61
|
+
description="User reviews of products on Czech e-shop Mall.cz with 3 sentiment classes (positive, neutral, negative) This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
|
|
63
62
|
reference="https://aclanthology.org/W13-1609/",
|
|
64
63
|
dataset={
|
|
65
64
|
"path": "mteb/czech_product_review_sentiment",
|
|
@@ -55,8 +55,7 @@ Montoyo, Andres},
|
|
|
55
55
|
class CzechSoMeSentimentClassificationV2(AbsTaskClassification):
|
|
56
56
|
metadata = TaskMetadata(
|
|
57
57
|
name="CzechSoMeSentimentClassification.v2",
|
|
58
|
-
description="
|
|
59
|
-
This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
|
|
58
|
+
description="User comments on Facebook This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
|
|
60
59
|
reference="https://aclanthology.org/W13-1609/",
|
|
61
60
|
dataset={
|
|
62
61
|
"path": "mteb/czech_so_me_sentiment",
|
|
@@ -47,8 +47,7 @@ class AngryTweetsClassificationV2(AbsTaskClassification):
|
|
|
47
47
|
"path": "mteb/angry_tweets",
|
|
48
48
|
"revision": "b9475fb66a13befda4fa9871cd92343bb2c0eb77",
|
|
49
49
|
},
|
|
50
|
-
description="
|
|
51
|
-
This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
|
|
50
|
+
description="A sentiment dataset with 3 classes (positive, negative, neutral) for Danish tweets This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
|
|
52
51
|
reference="https://aclanthology.org/2021.nodalida-main.53/",
|
|
53
52
|
type="Classification",
|
|
54
53
|
category="t2c",
|
|
@@ -49,8 +49,7 @@ class DanishPoliticalCommentsClassificationV2(AbsTaskClassification):
|
|
|
49
49
|
"path": "mteb/danish_political_comments",
|
|
50
50
|
"revision": "476a9e7327aba70ad3e97a169d7310b86be9b245",
|
|
51
51
|
},
|
|
52
|
-
description="
|
|
53
|
-
This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
|
|
52
|
+
description="A dataset of Danish political comments rated for sentiment This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
|
|
54
53
|
reference="https://huggingface.co/datasets/danish_political_comments",
|
|
55
54
|
type="Classification",
|
|
56
55
|
category="t2c",
|
|
@@ -69,8 +69,7 @@ class DdiscoCohesionClassificationV2(AbsTaskClassification):
|
|
|
69
69
|
"path": "mteb/ddisco_cohesion",
|
|
70
70
|
"revision": "b5a05bdecdfc6efc14eebc8f7a86e0986edaf5ff",
|
|
71
71
|
},
|
|
72
|
-
description="
|
|
73
|
-
This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
|
|
72
|
+
description="A Danish Discourse dataset with values for coherence and source (Wikipedia or Reddit) This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
|
|
74
73
|
reference="https://aclanthology.org/2022.lrec-1.260/",
|
|
75
74
|
type="Classification",
|
|
76
75
|
category="t2c",
|
|
@@ -62,7 +62,7 @@ Piperidis, Stelios},
|
|
|
62
62
|
|
|
63
63
|
def dataset_transform(self):
|
|
64
64
|
# convert label to a 0/1 label
|
|
65
|
-
labels = self.dataset["train"]["label"]
|
|
65
|
+
labels = self.dataset["train"]["label"]
|
|
66
66
|
lab2idx = {lab: idx for idx, lab in enumerate(set(labels))}
|
|
67
67
|
self.dataset = self.dataset.map(
|
|
68
68
|
lambda x: {"label": lab2idx[x["label"]]}, remove_columns=["label"]
|
|
@@ -76,8 +76,7 @@ class DKHateClassificationV2(AbsTaskClassification):
|
|
|
76
76
|
"path": "mteb/dk_hate",
|
|
77
77
|
"revision": "0468ff11393992d8347cf4282fb706fe970608d4",
|
|
78
78
|
},
|
|
79
|
-
description="
|
|
80
|
-
This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
|
|
79
|
+
description="Danish Tweets annotated for Hate Speech either being Offensive or not This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
|
|
81
80
|
reference="https://aclanthology.org/2020.lrec-1.430/",
|
|
82
81
|
type="Classification",
|
|
83
82
|
category="t2c",
|
|
@@ -56,8 +56,7 @@ Zesch, Torsten},
|
|
|
56
56
|
class GermanPoliticiansTwitterSentimentClassificationV2(AbsTaskClassification):
|
|
57
57
|
metadata = TaskMetadata(
|
|
58
58
|
name="GermanPoliticiansTwitterSentimentClassification.v2",
|
|
59
|
-
description="
|
|
60
|
-
This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
|
|
59
|
+
description="GermanPoliticiansTwitterSentiment is a dataset of German tweets categorized with their sentiment (3 classes). This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
|
|
61
60
|
reference="https://aclanthology.org/2022.konvens-1.9",
|
|
62
61
|
dataset={
|
|
63
62
|
"path": "mteb/german_politicians_twitter_sentiment",
|
|
@@ -43,8 +43,7 @@ class TenKGnadClassification(AbsTaskClassification):
|
|
|
43
43
|
class TenKGnadClassificationV2(AbsTaskClassification):
|
|
44
44
|
metadata = TaskMetadata(
|
|
45
45
|
name="TenKGnadClassification.v2",
|
|
46
|
-
description="
|
|
47
|
-
This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
|
|
46
|
+
description="10k German News Articles Dataset (10kGNAD) contains news articles from the online Austrian newspaper website DER Standard with their topic classification (9 classes). This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
|
|
48
47
|
reference="https://tblock.github.io/10kGNAD/",
|
|
49
48
|
dataset={
|
|
50
49
|
"path": "mteb/ten_k_gnad",
|
|
@@ -44,8 +44,7 @@ class AmazonPolarityClassification(AbsTaskClassification):
|
|
|
44
44
|
class AmazonPolarityClassificationV2(AbsTaskClassification):
|
|
45
45
|
metadata = TaskMetadata(
|
|
46
46
|
name="AmazonPolarityClassification.v2",
|
|
47
|
-
description="
|
|
48
|
-
This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
|
|
47
|
+
description="Amazon Polarity Classification Dataset. This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
|
|
49
48
|
reference="https://huggingface.co/datasets/amazon_polarity",
|
|
50
49
|
dataset={
|
|
51
50
|
"path": "mteb/amazon_polarity",
|
|
@@ -43,8 +43,7 @@ class ArxivClassification(AbsTaskClassification):
|
|
|
43
43
|
class ArxivClassificationV2(AbsTaskClassification):
|
|
44
44
|
metadata = TaskMetadata(
|
|
45
45
|
name="ArxivClassification.v2",
|
|
46
|
-
description="
|
|
47
|
-
This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
|
|
46
|
+
description="Classification Dataset of Arxiv Papers This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
|
|
48
47
|
dataset={
|
|
49
48
|
"path": "mteb/arxiv",
|
|
50
49
|
"revision": "202e10e9a5d37a5068397b48184d0728346a7b4a",
|
|
@@ -61,8 +61,7 @@ Shah, Rushin},
|
|
|
61
61
|
class Banking77ClassificationV2(AbsTaskClassification):
|
|
62
62
|
metadata = TaskMetadata(
|
|
63
63
|
name="Banking77Classification.v2",
|
|
64
|
-
description="
|
|
65
|
-
This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
|
|
64
|
+
description="Dataset composed of online banking queries annotated with their corresponding intents. This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
|
|
66
65
|
reference="https://arxiv.org/abs/2003.04807",
|
|
67
66
|
dataset={
|
|
68
67
|
"path": "mteb/banking77",
|
|
@@ -50,8 +50,7 @@ class DBpediaClassification(AbsTaskClassification):
|
|
|
50
50
|
class DBpediaClassificationV2(AbsTaskClassification):
|
|
51
51
|
metadata = TaskMetadata(
|
|
52
52
|
name="DBpediaClassification.v2",
|
|
53
|
-
description="
|
|
54
|
-
This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
|
|
53
|
+
description="DBpedia14 is a dataset of English texts from Wikipedia articles, categorized into 14 non-overlapping classes based on their DBpedia ontology. This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
|
|
55
54
|
reference="https://arxiv.org/abs/1509.01626",
|
|
56
55
|
dataset={
|
|
57
56
|
"path": "mteb/d_bpedia",
|
|
@@ -59,8 +59,7 @@ Tsujii, Jun{'}ichi},
|
|
|
59
59
|
class EmotionClassificationV2(AbsTaskClassification):
|
|
60
60
|
metadata = TaskMetadata(
|
|
61
61
|
name="EmotionClassification.v2",
|
|
62
|
-
description="
|
|
63
|
-
This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
|
|
62
|
+
description="Emotion is a dataset of English Twitter messages with six basic emotions: anger, fear, joy, love, sadness, and surprise. This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
|
|
64
63
|
reference="https://www.aclweb.org/anthology/D18-1404",
|
|
65
64
|
dataset={
|
|
66
65
|
"path": "mteb/emotion",
|
|
@@ -40,8 +40,7 @@ class FinancialPhrasebankClassification(AbsTaskClassification):
|
|
|
40
40
|
class FinancialPhrasebankClassificationV2(AbsTaskClassification):
|
|
41
41
|
metadata = TaskMetadata(
|
|
42
42
|
name="FinancialPhrasebankClassification.v2",
|
|
43
|
-
description="
|
|
44
|
-
This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
|
|
43
|
+
description="Polar sentiment dataset of sentences from financial news, categorized by sentiment into positive, negative, or neutral. This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
|
|
45
44
|
reference="https://arxiv.org/abs/1307.5336",
|
|
46
45
|
dataset={
|
|
47
46
|
"path": "mteb/financial_phrasebank",
|
|
@@ -42,8 +42,7 @@ class FrenkEnClassification(AbsTaskClassification):
|
|
|
42
42
|
class FrenkEnClassificationV2(AbsTaskClassification):
|
|
43
43
|
metadata = TaskMetadata(
|
|
44
44
|
name="FrenkEnClassification.v2",
|
|
45
|
-
description="
|
|
46
|
-
This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
|
|
45
|
+
description="English subset of the FRENK dataset This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
|
|
47
46
|
dataset={
|
|
48
47
|
"path": "mteb/frenk_en",
|
|
49
48
|
"revision": "630d941b6e0879a7238da89af6bfe1b1eb27ca0f",
|
|
@@ -10,7 +10,7 @@ class GTSRBClassification(AbsTaskClassification):
|
|
|
10
10
|
|
|
11
11
|
metadata = TaskMetadata(
|
|
12
12
|
name="GTSRB",
|
|
13
|
-
description="
|
|
13
|
+
description="The German Traffic Sign Recognition Benchmark (GTSRB) is a multi-class classification dataset for traffic signs. It consists of dataset of more than 50,000 traffic sign images. The dataset comprises 43 classes with unbalanced class frequencies.",
|
|
14
14
|
reference="https://benchmark.ini.rub.de/",
|
|
15
15
|
dataset={
|
|
16
16
|
"path": "clip-benchmark/wds_gtsrb",
|
|
@@ -55,8 +55,7 @@ Mihalcea, Rada},
|
|
|
55
55
|
class ImdbClassificationV2(AbsTaskClassification):
|
|
56
56
|
metadata = TaskMetadata(
|
|
57
57
|
name="ImdbClassification.v2",
|
|
58
|
-
description="
|
|
59
|
-
This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
|
|
58
|
+
description="Large Movie Review Dataset This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)",
|
|
60
59
|
dataset={
|
|
61
60
|
"path": "mteb/imdb",
|
|
62
61
|
"revision": "d05f0155defa7991dad75bc68c5ccb6774b1fdc5",
|