mteb 2.7.2__py3-none-any.whl → 2.7.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/_create_dataloaders.py +63 -14
- mteb/_evaluators/any_sts_evaluator.py +12 -5
- mteb/_evaluators/clustering_evaluator.py +12 -4
- mteb/_evaluators/evaluator.py +11 -5
- mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +14 -5
- mteb/_evaluators/pair_classification_evaluator.py +13 -5
- mteb/_evaluators/retrieval_evaluator.py +22 -13
- mteb/_evaluators/retrieval_metrics.py +9 -3
- mteb/_evaluators/sklearn_evaluator.py +20 -11
- mteb/_evaluators/text/bitext_mining_evaluator.py +10 -3
- mteb/_evaluators/text/summarization_evaluator.py +10 -4
- mteb/_evaluators/zeroshot_classification_evaluator.py +12 -3
- mteb/_helpful_enum.py +5 -1
- mteb/abstasks/_data_filter/filters.py +8 -2
- mteb/abstasks/_data_filter/task_pipelines.py +7 -2
- mteb/abstasks/_statistics_calculation.py +6 -4
- mteb/abstasks/abstask.py +48 -21
- mteb/abstasks/aggregate_task_metadata.py +20 -9
- mteb/abstasks/aggregated_task.py +15 -8
- mteb/abstasks/classification.py +25 -9
- mteb/abstasks/clustering.py +23 -10
- mteb/abstasks/clustering_legacy.py +22 -8
- mteb/abstasks/image/image_text_pair_classification.py +23 -9
- mteb/abstasks/multilabel_classification.py +13 -5
- mteb/abstasks/pair_classification.py +27 -11
- mteb/abstasks/regression.py +14 -6
- mteb/abstasks/retrieval.py +56 -30
- mteb/abstasks/retrieval_dataset_loaders.py +48 -37
- mteb/abstasks/sts.py +29 -13
- mteb/abstasks/task_metadata.py +17 -8
- mteb/abstasks/text/bitext_mining.py +23 -12
- mteb/abstasks/text/reranking.py +2 -2
- mteb/abstasks/text/summarization.py +19 -8
- mteb/abstasks/zeroshot_classification.py +23 -9
- mteb/benchmarks/_create_table.py +13 -7
- mteb/benchmarks/benchmark.py +11 -1
- mteb/benchmarks/benchmarks/__init__.py +2 -0
- mteb/benchmarks/benchmarks/benchmarks.py +41 -2
- mteb/benchmarks/benchmarks/rteb_benchmarks.py +20 -9
- mteb/cache.py +10 -5
- mteb/cli/_display_tasks.py +9 -3
- mteb/cli/build_cli.py +5 -2
- mteb/cli/generate_model_card.py +9 -2
- mteb/deprecated_evaluator.py +16 -12
- mteb/descriptive_stats/Retrieval/BrightAopsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightBiologyLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightBiologyRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEarthScienceLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEarthScienceRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEconomicsLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEconomicsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightLeetcodeRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPonyLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPonyRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPsychologyLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPsychologyRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightRoboticsLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightRoboticsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightStackoverflowLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightStackoverflowRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightSustainableLivingLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightSustainableLivingRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightTheoremQAQuestionsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightTheoremQATheoremsRetrieval.json +35 -0
- mteb/evaluate.py +33 -20
- mteb/filter_tasks.py +12 -7
- mteb/get_tasks.py +9 -4
- mteb/languages/language_scripts.py +8 -3
- mteb/leaderboard/app.py +11 -4
- mteb/leaderboard/table.py +7 -2
- mteb/load_results.py +9 -3
- mteb/models/abs_encoder.py +22 -12
- mteb/models/cache_wrappers/cache_backend_protocol.py +5 -3
- mteb/models/cache_wrappers/cache_backends/_hash_utils.py +8 -4
- mteb/models/cache_wrappers/cache_backends/faiss_cache.py +8 -3
- mteb/models/cache_wrappers/cache_wrapper.py +14 -9
- mteb/models/get_model_meta.py +32 -6
- mteb/models/instruct_wrapper.py +13 -5
- mteb/models/model_implementations/align_models.py +10 -4
- mteb/models/model_implementations/amazon_models.py +1 -0
- mteb/models/model_implementations/andersborges.py +2 -0
- mteb/models/model_implementations/ara_models.py +1 -0
- mteb/models/model_implementations/arctic_models.py +8 -0
- mteb/models/model_implementations/b1ade_models.py +1 -0
- mteb/models/model_implementations/bedrock_models.py +20 -6
- mteb/models/model_implementations/bge_models.py +40 -1
- mteb/models/model_implementations/bica_model.py +1 -0
- mteb/models/model_implementations/blip2_models.py +11 -4
- mteb/models/model_implementations/blip_models.py +17 -4
- mteb/models/model_implementations/bm25.py +24 -14
- mteb/models/model_implementations/bmretriever_models.py +10 -2
- mteb/models/model_implementations/cadet_models.py +1 -0
- mteb/models/model_implementations/cde_models.py +11 -5
- mteb/models/model_implementations/clip_models.py +12 -4
- mteb/models/model_implementations/clips_models.py +3 -0
- mteb/models/model_implementations/codefuse_models.py +5 -0
- mteb/models/model_implementations/codesage_models.py +3 -0
- mteb/models/model_implementations/cohere_models.py +14 -4
- mteb/models/model_implementations/cohere_v.py +14 -4
- mteb/models/model_implementations/colpali_models.py +7 -3
- mteb/models/model_implementations/colqwen_models.py +17 -31
- mteb/models/model_implementations/colsmol_models.py +3 -1
- mteb/models/model_implementations/conan_models.py +11 -4
- mteb/models/model_implementations/dino_models.py +28 -4
- mteb/models/model_implementations/e5_instruct.py +4 -0
- mteb/models/model_implementations/e5_models.py +9 -0
- mteb/models/model_implementations/e5_v.py +10 -4
- mteb/models/model_implementations/eagerworks_models.py +11 -4
- mteb/models/model_implementations/emillykkejensen_models.py +3 -0
- mteb/models/model_implementations/en_code_retriever.py +1 -0
- mteb/models/model_implementations/euler_models.py +1 -0
- mteb/models/model_implementations/evaclip_models.py +13 -4
- mteb/models/model_implementations/fa_models.py +9 -0
- mteb/models/model_implementations/facebookai.py +2 -0
- mteb/models/model_implementations/geogpt_models.py +1 -0
- mteb/models/model_implementations/gme_v_models.py +7 -3
- mteb/models/model_implementations/google_models.py +15 -4
- mteb/models/model_implementations/granite_vision_embedding_models.py +7 -5
- mteb/models/model_implementations/gritlm_models.py +3 -0
- mteb/models/model_implementations/gte_models.py +9 -0
- mteb/models/model_implementations/hinvec_models.py +6 -1
- mteb/models/model_implementations/human.py +1 -0
- mteb/models/model_implementations/ibm_granite_models.py +6 -0
- mteb/models/model_implementations/inf_models.py +2 -0
- mteb/models/model_implementations/jasper_models.py +14 -5
- mteb/models/model_implementations/jina_clip.py +10 -4
- mteb/models/model_implementations/jina_models.py +17 -5
- mteb/models/model_implementations/kalm_models.py +24 -12
- mteb/models/model_implementations/kblab.py +1 -0
- mteb/models/model_implementations/kennethenevoldsen_models.py +2 -0
- mteb/models/model_implementations/kfst.py +1 -0
- mteb/models/model_implementations/kowshik24_models.py +1 -0
- mteb/models/model_implementations/lens_models.py +2 -0
- mteb/models/model_implementations/lgai_embedding_models.py +1 -0
- mteb/models/model_implementations/linq_models.py +7 -1
- mteb/models/model_implementations/listconranker.py +10 -4
- mteb/models/model_implementations/llm2clip_models.py +12 -4
- mteb/models/model_implementations/llm2vec_models.py +20 -6
- mteb/models/model_implementations/mcinext_models.py +8 -2
- mteb/models/model_implementations/mdbr_models.py +2 -0
- mteb/models/model_implementations/misc_models.py +63 -0
- mteb/models/model_implementations/mixedbread_ai_models.py +3 -0
- mteb/models/model_implementations/mme5_models.py +2 -1
- mteb/models/model_implementations/moco_models.py +11 -4
- mteb/models/model_implementations/mod_models.py +2 -1
- mteb/models/model_implementations/model2vec_models.py +23 -4
- mteb/models/model_implementations/moka_models.py +3 -0
- mteb/models/model_implementations/nbailab.py +3 -0
- mteb/models/model_implementations/no_instruct_sentence_models.py +13 -5
- mteb/models/model_implementations/nomic_models.py +17 -4
- mteb/models/model_implementations/nomic_models_vision.py +5 -3
- mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +9 -3
- mteb/models/model_implementations/nvidia_models.py +15 -4
- mteb/models/model_implementations/octen_models.py +3 -1
- mteb/models/model_implementations/openai_models.py +14 -4
- mteb/models/model_implementations/openclip_models.py +17 -4
- mteb/models/model_implementations/opensearch_neural_sparse_models.py +15 -4
- mteb/models/model_implementations/ops_moa_models.py +9 -2
- mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +1 -0
- mteb/models/model_implementations/pawan_models.py +1 -0
- mteb/models/model_implementations/piccolo_models.py +2 -0
- mteb/models/model_implementations/promptriever_models.py +16 -6
- mteb/models/model_implementations/pylate_models.py +32 -13
- mteb/models/model_implementations/qodo_models.py +2 -0
- mteb/models/model_implementations/qtack_models.py +1 -0
- mteb/models/model_implementations/qwen3_models.py +11 -1
- mteb/models/model_implementations/qzhou_models.py +2 -0
- mteb/models/model_implementations/random_baseline.py +4 -3
- mteb/models/model_implementations/rasgaard_models.py +1 -0
- mteb/models/model_implementations/reasonir_model.py +65 -0
- mteb/models/model_implementations/repllama_models.py +15 -6
- mteb/models/model_implementations/rerankers_custom.py +13 -4
- mteb/models/model_implementations/rerankers_monot5_based.py +24 -4
- mteb/models/model_implementations/richinfoai_models.py +1 -0
- mteb/models/model_implementations/ru_sentence_models.py +20 -0
- mteb/models/model_implementations/ruri_models.py +10 -0
- mteb/models/model_implementations/salesforce_models.py +10 -1
- mteb/models/model_implementations/samilpwc_models.py +1 -0
- mteb/models/model_implementations/sarashina_embedding_models.py +2 -0
- mteb/models/model_implementations/searchmap_models.py +1 -0
- mteb/models/model_implementations/seed_1_6_embedding_models.py +5 -2
- mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +6 -2
- mteb/models/model_implementations/seed_models.py +2 -1
- mteb/models/model_implementations/sentence_transformers_models.py +18 -0
- mteb/models/model_implementations/shuu_model.py +1 -0
- mteb/models/model_implementations/siglip_models.py +19 -4
- mteb/models/model_implementations/slm_models.py +7 -4
- mteb/models/model_implementations/sonar_models.py +2 -1
- mteb/models/model_implementations/spartan8806_atles_champion.py +1 -0
- mteb/models/model_implementations/stella_models.py +6 -0
- mteb/models/model_implementations/tarka_models.py +2 -0
- mteb/models/model_implementations/text2vec_models.py +3 -0
- mteb/models/model_implementations/ua_sentence_models.py +1 -0
- mteb/models/model_implementations/uae_models.py +10 -4
- mteb/models/model_implementations/vdr_models.py +8 -1
- mteb/models/model_implementations/vi_vn_models.py +6 -0
- mteb/models/model_implementations/vista_models.py +11 -4
- mteb/models/model_implementations/vlm2vec_models.py +11 -4
- mteb/models/model_implementations/voyage_models.py +52 -4
- mteb/models/model_implementations/voyage_v.py +11 -6
- mteb/models/model_implementations/xyz_models.py +1 -0
- mteb/models/model_implementations/youtu_models.py +1 -0
- mteb/models/model_implementations/yuan_models.py +1 -0
- mteb/models/model_implementations/yuan_models_en.py +2 -1
- mteb/models/model_meta.py +47 -9
- mteb/models/models_protocols.py +23 -18
- mteb/models/search_encoder_index/search_backend_protocol.py +7 -3
- mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +12 -4
- mteb/models/search_wrappers.py +31 -12
- mteb/models/sentence_transformer_wrapper.py +4 -3
- mteb/models/vllm_wrapper.py +8 -6
- mteb/results/benchmark_results.py +22 -17
- mteb/results/model_result.py +21 -15
- mteb/results/task_result.py +32 -16
- mteb/similarity_functions.py +8 -2
- mteb/tasks/aggregated_tasks/eng/cqadupstack_retrieval.py +3 -3
- mteb/tasks/aggregated_tasks/eng/sts17_multilingual_visual_sts_eng.py +3 -3
- mteb/tasks/aggregated_tasks/eng/sts_benchmark_multilingual_visual_sts_eng.py +3 -3
- mteb/tasks/aggregated_tasks/fas/cqadupstack_retrieval_fa.py +3 -3
- mteb/tasks/aggregated_tasks/fas/syn_per_chatbot_conv_sa_classification.py +3 -3
- mteb/tasks/aggregated_tasks/multilingual/sts17_multilingual_vision_sts.py +3 -3
- mteb/tasks/aggregated_tasks/multilingual/sts_benchmark_multilingual_visual_sts.py +3 -3
- mteb/tasks/aggregated_tasks/nld/cqadupstack_nl_retrieval.py +3 -3
- mteb/tasks/aggregated_tasks/pol/cqadupstack_retrieval_pl.py +3 -3
- mteb/tasks/bitext_mining/eng/pub_chem_smiles_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/fas/fa_mteb_summary_retrieval.py +3 -3
- mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/flores_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/in22_conv_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/in22_gen_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/norwegian_courts_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/ntrex_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/roma_tales_bitext_mining.py +2 -2
- mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -2
- mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -1
- mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -1
- mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -1
- mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -1
- mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -1
- mteb/tasks/classification/bul/bulgarian_store_review_sentiment_classfication.py +1 -1
- mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +2 -2
- mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -1
- mteb/tasks/classification/dan/dk_hate_classification.py +1 -1
- mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -1
- mteb/tasks/classification/ell/greek_legal_code_classification.py +1 -1
- mteb/tasks/classification/eng/dbpedia_classification.py +2 -2
- mteb/tasks/classification/eng/toxic_chat_classification.py +2 -2
- mteb/tasks/classification/eng/toxic_conversations_classification.py +2 -2
- mteb/tasks/classification/eng/tweet_topic_single_classification.py +1 -1
- mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -1
- mteb/tasks/classification/eng/yelp_review_full_classification.py +2 -2
- mteb/tasks/classification/est/estonian_valence.py +1 -1
- mteb/tasks/classification/fas/fa_mteb_classification.py +6 -6
- mteb/tasks/classification/fas/persian_food_sentiment_classification.py +1 -1
- mteb/tasks/classification/fil/filipino_shopee_reviews_classification.py +1 -1
- mteb/tasks/classification/fin/fin_toxicity_classification.py +1 -1
- mteb/tasks/classification/fra/french_book_reviews.py +2 -2
- mteb/tasks/classification/fra/movie_review_sentiment_classification.py +2 -2
- mteb/tasks/classification/guj/gujarati_news_classification.py +1 -1
- mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -1
- mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -1
- mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +2 -2
- mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -1
- mteb/tasks/classification/ita/dado_eval_coarse_classification.py +1 -1
- mteb/tasks/classification/ita/ita_casehold_classification.py +1 -1
- mteb/tasks/classification/ita/sardi_stance_classification.py +1 -1
- mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -1
- mteb/tasks/classification/jpn/wrime_classification.py +1 -1
- mteb/tasks/classification/kan/kannada_news_classification.py +2 -2
- mteb/tasks/classification/kor/klue_tc.py +2 -2
- mteb/tasks/classification/kor/kor_fin.py +1 -1
- mteb/tasks/classification/kor/kor_hate_classification.py +1 -1
- mteb/tasks/classification/kor/kor_sarcasm_classification.py +1 -1
- mteb/tasks/classification/mal/malayalam_news_classification.py +1 -1
- mteb/tasks/classification/mar/marathi_news_classification.py +1 -1
- mteb/tasks/classification/multilingual/afri_senti_lang_classification.py +1 -1
- mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -1
- mteb/tasks/classification/multilingual/cyrillic_turkic_lang_classification.py +1 -1
- mteb/tasks/classification/multilingual/indic_nlp_news_classification.py +1 -1
- mteb/tasks/classification/multilingual/masakha_news_classification.py +1 -1
- mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -1
- mteb/tasks/classification/multilingual/multilingual_sentiment_classification.py +1 -1
- mteb/tasks/classification/multilingual/scala_classification.py +1 -1
- mteb/tasks/classification/multilingual/sib200_classification.py +1 -1
- mteb/tasks/classification/multilingual/turkic_classification.py +1 -1
- mteb/tasks/classification/multilingual/tweet_sentiment_classification.py +1 -1
- mteb/tasks/classification/nep/nepali_news_classification.py +2 -2
- mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +1 -1
- mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +1 -1
- mteb/tasks/classification/ory/odia_news_classification.py +2 -2
- mteb/tasks/classification/pan/punjabi_news_classification.py +1 -1
- mteb/tasks/classification/ron/moroco.py +1 -1
- mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -1
- mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -1
- mteb/tasks/classification/rus/georeview_classification.py +1 -1
- mteb/tasks/classification/rus/headline_classification.py +2 -2
- mteb/tasks/classification/rus/inappropriateness_classification.py +2 -2
- mteb/tasks/classification/rus/ru_reviews_classification.py +2 -2
- mteb/tasks/classification/rus/ru_sci_bench_grnti_classification.py +1 -1
- mteb/tasks/classification/rus/ru_sci_bench_oecd_classification.py +1 -1
- mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -1
- mteb/tasks/classification/san/sanskrit_shlokas_classification.py +1 -1
- mteb/tasks/classification/sin/sinhala_news_classification.py +2 -2
- mteb/tasks/classification/sin/sinhala_news_source_classification.py +2 -2
- mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +2 -2
- mteb/tasks/classification/slv/frenk_sl_classification.py +1 -1
- mteb/tasks/classification/spa/spanish_news_classification.py +2 -2
- mteb/tasks/classification/ssw/siswati_news_classification.py +1 -1
- mteb/tasks/classification/tam/tamil_news_classification.py +2 -2
- mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +2 -2
- mteb/tasks/classification/tha/wongnai_reviews_classification.py +1 -1
- mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +2 -2
- mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -2
- mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -1
- mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -1
- mteb/tasks/classification/zho/yue_openrice_review_classification.py +2 -2
- mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -1
- mteb/tasks/clustering/deu/blurbs_clustering_p2p.py +1 -1
- mteb/tasks/clustering/deu/blurbs_clustering_s2s.py +1 -1
- mteb/tasks/clustering/eng/arxiv_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/arxiv_hierarchical_clustering.py +2 -2
- mteb/tasks/clustering/eng/big_patent_clustering.py +1 -1
- mteb/tasks/clustering/eng/biorxiv_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/biorxiv_clustering_s2s.py +1 -1
- mteb/tasks/clustering/eng/medrxiv_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/medrxiv_clustering_s2s.py +1 -1
- mteb/tasks/clustering/eng/reddit_clustering.py +1 -1
- mteb/tasks/clustering/eng/reddit_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/stack_exchange_clustering.py +1 -1
- mteb/tasks/clustering/eng/stack_exchange_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/twenty_newsgroups_clustering.py +1 -1
- mteb/tasks/clustering/fas/fa_mteb_clustering.py +4 -4
- mteb/tasks/clustering/fra/hal_clustering_s2s.py +2 -2
- mteb/tasks/clustering/multilingual/mlsum_clustering_p2p.py +2 -2
- mteb/tasks/clustering/multilingual/mlsum_clustering_s2s.py +2 -2
- mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -1
- mteb/tasks/clustering/multilingual/wiki_clustering_p2p.py +1 -1
- mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +1 -1
- mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +1 -1
- mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +1 -1
- mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +1 -1
- mteb/tasks/clustering/nld/vabb_clustering_p2p.py +1 -1
- mteb/tasks/clustering/nld/vabb_clustering_s2s.py +1 -1
- mteb/tasks/clustering/nob/snl_clustering.py +8 -3
- mteb/tasks/clustering/nob/vg_clustering.py +8 -3
- mteb/tasks/clustering/pol/polish_clustering.py +3 -3
- mteb/tasks/clustering/rus/ru_sci_bench_grnti_clustering_p2p.py +1 -1
- mteb/tasks/clustering/rus/ru_sci_bench_oecd_clustering_p2p.py +1 -1
- mteb/tasks/clustering/zho/cmteb_clustering.py +4 -4
- mteb/tasks/image_text_pair_classification/eng/image_co_de.py +1 -1
- mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +1 -1
- mteb/tasks/instruction_reranking/multilingual/m_follow_ir.py +2 -2
- mteb/tasks/multichoice/eng/cv_bench.py +4 -4
- mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -1
- mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -1
- mteb/tasks/multilabel_classification/rus/ru_toixic_multilabelclassification_okmlcup.py +1 -1
- mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
- mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -1
- mteb/tasks/pair_classification/ara/ar_entail.py +1 -1
- mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -1
- mteb/tasks/pair_classification/deu/false_friends_de_en_pc.py +1 -1
- mteb/tasks/pair_classification/eng/pub_chem_ai_sentence_paraphrase_pc.py +1 -1
- mteb/tasks/pair_classification/eng/pub_chem_smilespc.py +4 -3
- mteb/tasks/pair_classification/eng/pub_chem_synonym_pc.py +1 -1
- mteb/tasks/pair_classification/eng/pub_chem_wiki_paragraphs_pc.py +1 -1
- mteb/tasks/pair_classification/eng/sprint_duplicate_questions_pc.py +1 -1
- mteb/tasks/pair_classification/eng/twitter_sem_eval2015_pc.py +1 -1
- mteb/tasks/pair_classification/eng/twitter_url_corpus_pc.py +1 -1
- mteb/tasks/pair_classification/fas/fa_mteb_pair_classification.py +5 -5
- mteb/tasks/pair_classification/fas/fars_tail.py +2 -2
- mteb/tasks/pair_classification/hye/armenian_paraphrase_pc.py +1 -1
- mteb/tasks/pair_classification/ita/dis_co_tex_pair_classification.py +1 -1
- mteb/tasks/pair_classification/kor/klue_nli.py +1 -1
- mteb/tasks/pair_classification/multilingual/rte3.py +2 -2
- mteb/tasks/pair_classification/multilingual/xnli.py +1 -1
- mteb/tasks/pair_classification/pol/polish_pc.py +4 -4
- mteb/tasks/pair_classification/por/assin2_rte.py +1 -1
- mteb/tasks/pair_classification/por/sick_br_pc.py +1 -1
- mteb/tasks/pair_classification/rus/terra.py +2 -2
- mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -1
- mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -1
- mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -1
- mteb/tasks/pair_classification/zho/cmteb_pair_classification.py +2 -2
- mteb/tasks/retrieval/ara/sadeem_question_retrieval.py +1 -1
- mteb/tasks/retrieval/code/code_edit_search_retrieval.py +1 -1
- mteb/tasks/retrieval/code/code_rag.py +4 -4
- mteb/tasks/retrieval/code/code_search_net_cc_retrieval.py +1 -1
- mteb/tasks/retrieval/code/coir_code_search_net_retrieval.py +1 -1
- mteb/tasks/retrieval/code/ds1000_retrieval.py +1 -1
- mteb/tasks/retrieval/code/fresh_stack_retrieval.py +1 -1
- mteb/tasks/retrieval/code/human_eval_retrieval.py +1 -1
- mteb/tasks/retrieval/code/mbpp_retrieval.py +1 -1
- mteb/tasks/retrieval/code/wiki_sql_retrieval.py +1 -1
- mteb/tasks/retrieval/dan/dan_fever_retrieval.py +1 -1
- mteb/tasks/retrieval/dan/tv2_nordretrieval.py +1 -1
- mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +1 -1
- mteb/tasks/retrieval/deu/german_gov_service_retrieval.py +1 -1
- mteb/tasks/retrieval/deu/german_qu_ad_retrieval.py +1 -1
- mteb/tasks/retrieval/ell/greek_civics_qa.py +1 -1
- mteb/tasks/retrieval/eng/__init__.py +42 -0
- mteb/tasks/retrieval/eng/bright_retrieval.py +10 -2
- mteb/tasks/retrieval/eng/bright_v1_1_retrieval.py +968 -0
- mteb/tasks/retrieval/eng/chat_doctor_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/fin_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/finance_bench_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/hc3_finance_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_narrative_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_needle_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_passkey_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_summ_screen_fd_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_wikim_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lembqm_sum_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/limit_retrieval.py +6 -1
- mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/ml_questions.py +1 -1
- mteb/tasks/retrieval/eng/nano_argu_ana_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_climate_fever_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_db_pedia_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_fever_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_fi_qa2018_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_hotpot_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_msmarco_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_nf_corpus_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_nq_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_quora_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_sci_fact_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_scidocs_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_touche2020_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/narrative_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/r2_med_retrieval.py +8 -8
- mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +10 -10
- mteb/tasks/retrieval/fra/f_qu_ad_retrieval.py +1 -1
- mteb/tasks/retrieval/fra/syntec_retrieval.py +1 -1
- mteb/tasks/retrieval/hun/hun_sum2.py +1 -1
- mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt19.py +1 -1
- mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt21.py +1 -1
- mteb/tasks/retrieval/multilingual/cur_ev1_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/mr_tidy_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +5 -5
- mteb/tasks/retrieval/multilingual/statcan_dialogue_dataset_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +14 -4
- mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/x_qu_ad_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_android_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_english_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_gaming_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_gis_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_mathematica_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_physics_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_programmers_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_stats_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_tex_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_unix_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_webmasters_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_wordpress_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nob/norquad.py +1 -1
- mteb/tasks/retrieval/nob/snl_retrieval.py +1 -1
- mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -1
- mteb/tasks/retrieval/vie/vie_qu_ad_retrieval.py +1 -1
- mteb/tasks/sts/fao/faroese_sts.py +1 -1
- mteb/tasks/sts/fra/sick_fr_sts.py +1 -1
- mteb/tasks/sts/kor/klue_sts.py +1 -1
- mteb/tasks/sts/por/sick_br_sts.py +1 -1
- mteb/tasks/sts/rus/ru_para_phraser_sts.py +1 -1
- mteb/tasks/zeroshot_classification/eng/sci_mmir.py +1 -1
- mteb/types/_encoder_io.py +1 -1
- mteb/types/statistics.py +9 -2
- {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/METADATA +1 -1
- {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/RECORD +486 -465
- {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/WHEEL +1 -1
- {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/entry_points.txt +0 -0
- {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/top_level.txt +0 -0
mteb/results/task_result.py
CHANGED
|
@@ -4,34 +4,40 @@ import json
|
|
|
4
4
|
import logging
|
|
5
5
|
import warnings
|
|
6
6
|
from collections import defaultdict
|
|
7
|
-
from collections.abc import Callable, Iterable, Mapping
|
|
8
7
|
from functools import cached_property
|
|
9
8
|
from importlib.metadata import version
|
|
10
|
-
from
|
|
11
|
-
from typing import Any
|
|
9
|
+
from typing import TYPE_CHECKING, Any
|
|
12
10
|
|
|
13
11
|
import numpy as np
|
|
14
12
|
from huggingface_hub import EvalResult
|
|
15
13
|
from packaging.version import Version
|
|
16
14
|
from pydantic import BaseModel, field_validator
|
|
17
|
-
from typing_extensions import Self
|
|
18
15
|
|
|
19
16
|
from mteb import TaskMetadata
|
|
20
17
|
from mteb._helpful_enum import HelpfulStrEnum
|
|
21
18
|
from mteb.abstasks import AbsTaskClassification
|
|
22
19
|
from mteb.abstasks.abstask import AbsTask
|
|
23
|
-
from mteb.abstasks.task_metadata import TaskDomain
|
|
24
20
|
from mteb.languages import LanguageScripts
|
|
25
21
|
from mteb.models.model_meta import ScoringFunction
|
|
26
22
|
from mteb.types import (
|
|
27
|
-
HFSubset,
|
|
28
|
-
ISOLanguage,
|
|
29
|
-
ISOLanguageScript,
|
|
30
|
-
Score,
|
|
31
23
|
ScoresDict,
|
|
32
24
|
SplitName,
|
|
33
25
|
)
|
|
34
26
|
|
|
27
|
+
if TYPE_CHECKING:
|
|
28
|
+
from collections.abc import Callable, Iterable, Mapping
|
|
29
|
+
from pathlib import Path
|
|
30
|
+
|
|
31
|
+
from typing_extensions import Self
|
|
32
|
+
|
|
33
|
+
from mteb.abstasks.task_metadata import TaskDomain
|
|
34
|
+
from mteb.types import (
|
|
35
|
+
HFSubset,
|
|
36
|
+
ISOLanguage,
|
|
37
|
+
ISOLanguageScript,
|
|
38
|
+
Score,
|
|
39
|
+
)
|
|
40
|
+
|
|
35
41
|
logger = logging.getLogger(__name__)
|
|
36
42
|
|
|
37
43
|
|
|
@@ -641,16 +647,26 @@ class TaskResult(BaseModel):
|
|
|
641
647
|
if split not in splits:
|
|
642
648
|
continue
|
|
643
649
|
seen_subsets = set()
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
+
if task.is_aggregate:
|
|
651
|
+
# aggregate tasks only have the default subset, but in metadata can be multiple
|
|
652
|
+
new_scores[split] = [
|
|
653
|
+
_scores
|
|
654
|
+
for _scores in self.scores[split]
|
|
655
|
+
if _scores["hf_subset"] == "default"
|
|
656
|
+
]
|
|
657
|
+
seen_subsets = {"default"}
|
|
658
|
+
else:
|
|
659
|
+
new_scores[split] = [
|
|
660
|
+
_scores
|
|
661
|
+
for _scores in self.scores[split]
|
|
662
|
+
if _scores["hf_subset"] in hf_subsets
|
|
663
|
+
]
|
|
650
664
|
for _scores in new_scores[split]:
|
|
651
665
|
seen_subsets.add(_scores["hf_subset"])
|
|
652
666
|
|
|
653
|
-
if seen_subsets != hf_subsets
|
|
667
|
+
if seen_subsets != hf_subsets and not (
|
|
668
|
+
task.is_aggregate and "default" in seen_subsets
|
|
669
|
+
):
|
|
654
670
|
missing_subsets = hf_subsets - seen_subsets
|
|
655
671
|
if len(missing_subsets) > 2:
|
|
656
672
|
subset1, subset2 = list(missing_subsets)[:2]
|
mteb/similarity_functions.py
CHANGED
|
@@ -1,8 +1,14 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING
|
|
4
|
+
|
|
1
5
|
import torch
|
|
2
6
|
|
|
3
|
-
from mteb.models import EncoderProtocol
|
|
4
7
|
from mteb.models.model_meta import ScoringFunction
|
|
5
|
-
|
|
8
|
+
|
|
9
|
+
if TYPE_CHECKING:
|
|
10
|
+
from mteb.models import EncoderProtocol
|
|
11
|
+
from mteb.types import Array
|
|
6
12
|
|
|
7
13
|
|
|
8
14
|
def _use_torch_compile():
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
from mteb.abstasks import
|
|
2
|
-
from mteb.abstasks.aggregated_task import AbsTaskAggregate
|
|
1
|
+
from mteb.abstasks.aggregate_task_metadata import AggregateTaskMetadata
|
|
2
|
+
from mteb.abstasks.aggregated_task import AbsTaskAggregate
|
|
3
3
|
from mteb.tasks.retrieval import (
|
|
4
4
|
CQADupstackAndroidRetrieval,
|
|
5
5
|
CQADupstackEnglishRetrieval,
|
|
@@ -15,7 +15,7 @@ from mteb.tasks.retrieval import (
|
|
|
15
15
|
CQADupstackWordpressRetrieval,
|
|
16
16
|
)
|
|
17
17
|
|
|
18
|
-
task_list_cqa
|
|
18
|
+
task_list_cqa = [
|
|
19
19
|
CQADupstackAndroidRetrieval(),
|
|
20
20
|
CQADupstackEnglishRetrieval(),
|
|
21
21
|
CQADupstackGamingRetrieval(),
|
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
from mteb.abstasks.
|
|
2
|
-
from mteb.abstasks.aggregated_task import AbsTaskAggregate
|
|
1
|
+
from mteb.abstasks.aggregate_task_metadata import AggregateTaskMetadata
|
|
2
|
+
from mteb.abstasks.aggregated_task import AbsTaskAggregate
|
|
3
3
|
from mteb.tasks.sts.multilingual.sts17_multilingual_visual_sts import (
|
|
4
4
|
STS17MultilingualVisualSTS,
|
|
5
5
|
)
|
|
6
6
|
|
|
7
|
-
task_list_sts17
|
|
7
|
+
task_list_sts17 = [
|
|
8
8
|
STS17MultilingualVisualSTS().filter_languages(
|
|
9
9
|
languages=["eng"], hf_subsets=["en-en"]
|
|
10
10
|
)
|
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
from mteb.abstasks.
|
|
2
|
-
from mteb.abstasks.aggregated_task import AbsTaskAggregate
|
|
1
|
+
from mteb.abstasks.aggregate_task_metadata import AggregateTaskMetadata
|
|
2
|
+
from mteb.abstasks.aggregated_task import AbsTaskAggregate
|
|
3
3
|
from mteb.tasks.sts.multilingual.sts_benchmark_multilingual_visual_sts import (
|
|
4
4
|
STSBenchmarkMultilingualVisualSTS,
|
|
5
5
|
)
|
|
6
6
|
|
|
7
|
-
task_list_stsb
|
|
7
|
+
task_list_stsb = [
|
|
8
8
|
STSBenchmarkMultilingualVisualSTS().filter_languages(
|
|
9
9
|
languages=["eng"], hf_subsets=["en"]
|
|
10
10
|
)
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
from mteb.abstasks import
|
|
2
|
-
from mteb.abstasks.aggregated_task import AbsTaskAggregate
|
|
1
|
+
from mteb.abstasks.aggregate_task_metadata import AggregateTaskMetadata
|
|
2
|
+
from mteb.abstasks.aggregated_task import AbsTaskAggregate
|
|
3
3
|
from mteb.tasks.retrieval import (
|
|
4
4
|
CQADupstackAndroidRetrievalFa,
|
|
5
5
|
CQADupstackEnglishRetrievalFa,
|
|
@@ -15,7 +15,7 @@ from mteb.tasks.retrieval import (
|
|
|
15
15
|
CQADupstackWordpressRetrievalFa,
|
|
16
16
|
)
|
|
17
17
|
|
|
18
|
-
task_list_cqa
|
|
18
|
+
task_list_cqa = [
|
|
19
19
|
CQADupstackAndroidRetrievalFa(),
|
|
20
20
|
CQADupstackEnglishRetrievalFa(),
|
|
21
21
|
CQADupstackGamingRetrievalFa(),
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
from mteb.abstasks import
|
|
2
|
-
from mteb.abstasks.aggregated_task import AbsTaskAggregate
|
|
1
|
+
from mteb.abstasks.aggregate_task_metadata import AggregateTaskMetadata
|
|
2
|
+
from mteb.abstasks.aggregated_task import AbsTaskAggregate
|
|
3
3
|
from mteb.tasks.classification import (
|
|
4
4
|
SynPerChatbotConvSAAnger,
|
|
5
5
|
SynPerChatbotConvSAFear,
|
|
@@ -12,7 +12,7 @@ from mteb.tasks.classification import (
|
|
|
12
12
|
SynPerChatbotConvSASurprise,
|
|
13
13
|
)
|
|
14
14
|
|
|
15
|
-
task_list_cqa
|
|
15
|
+
task_list_cqa = [
|
|
16
16
|
SynPerChatbotConvSAAnger(),
|
|
17
17
|
SynPerChatbotConvSASatisfaction(),
|
|
18
18
|
SynPerChatbotConvSAFriendship(),
|
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
from mteb.abstasks.
|
|
2
|
-
from mteb.abstasks.aggregated_task import AbsTaskAggregate
|
|
1
|
+
from mteb.abstasks.aggregate_task_metadata import AggregateTaskMetadata
|
|
2
|
+
from mteb.abstasks.aggregated_task import AbsTaskAggregate
|
|
3
3
|
from mteb.tasks.sts.multilingual.sts17_multilingual_visual_sts import (
|
|
4
4
|
STS17MultilingualVisualSTS,
|
|
5
5
|
)
|
|
6
6
|
|
|
7
|
-
task_list_sts17_multi
|
|
7
|
+
task_list_sts17_multi = [
|
|
8
8
|
STS17MultilingualVisualSTS().filter_languages(
|
|
9
9
|
languages=["ara", "eng", "spa", "kor"],
|
|
10
10
|
hf_subsets=[
|
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
from mteb.abstasks.
|
|
2
|
-
from mteb.abstasks.aggregated_task import AbsTaskAggregate
|
|
1
|
+
from mteb.abstasks.aggregate_task_metadata import AggregateTaskMetadata
|
|
2
|
+
from mteb.abstasks.aggregated_task import AbsTaskAggregate
|
|
3
3
|
from mteb.tasks.sts.multilingual.sts_benchmark_multilingual_visual_sts import (
|
|
4
4
|
STSBenchmarkMultilingualVisualSTS,
|
|
5
5
|
)
|
|
6
6
|
|
|
7
|
-
task_list_multi
|
|
7
|
+
task_list_multi = [
|
|
8
8
|
STSBenchmarkMultilingualVisualSTS().filter_languages(
|
|
9
9
|
languages=[
|
|
10
10
|
"deu",
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
from mteb.abstasks import
|
|
2
|
-
from mteb.abstasks.aggregated_task import AbsTaskAggregate
|
|
1
|
+
from mteb.abstasks.aggregate_task_metadata import AggregateTaskMetadata
|
|
2
|
+
from mteb.abstasks.aggregated_task import AbsTaskAggregate
|
|
3
3
|
from mteb.tasks.retrieval import (
|
|
4
4
|
CQADupstackAndroidNLRetrieval,
|
|
5
5
|
CQADupstackEnglishNLRetrieval,
|
|
@@ -15,7 +15,7 @@ from mteb.tasks.retrieval import (
|
|
|
15
15
|
CQADupstackWordpressNLRetrieval,
|
|
16
16
|
)
|
|
17
17
|
|
|
18
|
-
task_list_cqa
|
|
18
|
+
task_list_cqa = [
|
|
19
19
|
CQADupstackAndroidNLRetrieval(),
|
|
20
20
|
CQADupstackEnglishNLRetrieval(),
|
|
21
21
|
CQADupstackGamingNLRetrieval(),
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
from mteb.abstasks import
|
|
2
|
-
from mteb.abstasks.aggregated_task import AbsTaskAggregate
|
|
1
|
+
from mteb.abstasks.aggregate_task_metadata import AggregateTaskMetadata
|
|
2
|
+
from mteb.abstasks.aggregated_task import AbsTaskAggregate
|
|
3
3
|
from mteb.tasks.retrieval.pol.cqadupstack_pl_retrieval import (
|
|
4
4
|
CQADupstackAndroidRetrievalPL,
|
|
5
5
|
CQADupstackEnglishRetrievalPL,
|
|
@@ -15,7 +15,7 @@ from mteb.tasks.retrieval.pol.cqadupstack_pl_retrieval import (
|
|
|
15
15
|
CQADupstackWordpressRetrievalPL,
|
|
16
16
|
)
|
|
17
17
|
|
|
18
|
-
task_list_cqa
|
|
18
|
+
task_list_cqa = [
|
|
19
19
|
CQADupstackAndroidRetrievalPL(),
|
|
20
20
|
CQADupstackEnglishRetrievalPL(),
|
|
21
21
|
CQADupstackGamingRetrievalPL(),
|
|
@@ -59,7 +59,7 @@ class PubChemSMILESBitextMining(AbsTaskBitextMining):
|
|
|
59
59
|
""",
|
|
60
60
|
)
|
|
61
61
|
|
|
62
|
-
def dataset_transform(self):
|
|
62
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
63
63
|
for subset in self.hf_subsets:
|
|
64
64
|
self.dataset[subset] = self.dataset[subset].rename_columns(
|
|
65
65
|
COL_MAPPING[subset]
|
|
@@ -27,7 +27,7 @@ class SAMSumFa(AbsTaskBitextMining):
|
|
|
27
27
|
bibtex_citation="",
|
|
28
28
|
)
|
|
29
29
|
|
|
30
|
-
def dataset_transform(self):
|
|
30
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
31
31
|
self.dataset = self.dataset.rename_columns(
|
|
32
32
|
{"text": "sentence1", "summary": "sentence2"}
|
|
33
33
|
)
|
|
@@ -58,7 +58,7 @@ class SynPerChatbotSumSRetrieval(AbsTaskBitextMining):
|
|
|
58
58
|
bibtex_citation=""" """,
|
|
59
59
|
)
|
|
60
60
|
|
|
61
|
-
def dataset_transform(self):
|
|
61
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
62
62
|
self.dataset = self.dataset.rename_columns(
|
|
63
63
|
{"text": "sentence1", "summary": "sentence2"}
|
|
64
64
|
)
|
|
@@ -89,7 +89,7 @@ class SynPerChatbotRAGSumSRetrieval(AbsTaskBitextMining):
|
|
|
89
89
|
bibtex_citation=""" """,
|
|
90
90
|
)
|
|
91
91
|
|
|
92
|
-
def dataset_transform(self):
|
|
92
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
93
93
|
self.dataset = self.dataset.rename_columns(
|
|
94
94
|
{"text": "sentence1", "summary": "sentence2"}
|
|
95
95
|
)
|
|
@@ -35,7 +35,7 @@ class NorwegianCourtsBitextMining(AbsTaskBitextMining):
|
|
|
35
35
|
prompt="Retrieve parallel sentences in Norwegian Bokmål and Nynorsk",
|
|
36
36
|
)
|
|
37
37
|
|
|
38
|
-
def dataset_transform(self):
|
|
38
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
39
39
|
# Convert to standard format
|
|
40
40
|
self.dataset = self.dataset.rename_column("nb", "sentence1")
|
|
41
41
|
self.dataset = self.dataset.rename_column("nn", "sentence2")
|
|
@@ -32,7 +32,7 @@ class RomaTalesBitextMining(AbsTaskBitextMining):
|
|
|
32
32
|
bibtex_citation="",
|
|
33
33
|
)
|
|
34
34
|
|
|
35
|
-
def load_data(self) -> None:
|
|
35
|
+
def load_data(self, num_proc: int = 1, **kwargs) -> None:
|
|
36
36
|
"""Load dataset from HuggingFace hub and convert it to the standard format."""
|
|
37
37
|
if self.data_loaded:
|
|
38
38
|
return
|
|
@@ -44,7 +44,7 @@ class RomaTalesBitextMining(AbsTaskBitextMining):
|
|
|
44
44
|
self.dataset_transform()
|
|
45
45
|
self.data_loaded = True
|
|
46
46
|
|
|
47
|
-
def dataset_transform(self):
|
|
47
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
48
48
|
for lang in self.hf_subsets:
|
|
49
49
|
self.dataset[lang] = self.dataset[lang].rename_columns(
|
|
50
50
|
{"romani": "sentence1", "hungarian": "sentence2"}
|
|
@@ -230,7 +230,7 @@ class WebFAQBitextMiningQuestions(AbsTaskBitextMining):
|
|
|
230
230
|
""",
|
|
231
231
|
)
|
|
232
232
|
|
|
233
|
-
def dataset_transform(self):
|
|
233
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
234
234
|
dataset = {}
|
|
235
235
|
for langs in self.dataset:
|
|
236
236
|
dataset[langs] = {}
|
|
@@ -284,7 +284,7 @@ class WebFAQBitextMiningQAs(AbsTaskBitextMining):
|
|
|
284
284
|
""",
|
|
285
285
|
)
|
|
286
286
|
|
|
287
|
-
def dataset_transform(self):
|
|
287
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
288
288
|
dataset = {}
|
|
289
289
|
for langs in self.dataset:
|
|
290
290
|
dataset[langs] = {}
|
|
@@ -28,7 +28,7 @@ class OnlineStoreReviewSentimentClassification(AbsTaskClassification):
|
|
|
28
28
|
superseded_by="OnlineStoreReviewSentimentClassification.v2",
|
|
29
29
|
)
|
|
30
30
|
|
|
31
|
-
def dataset_transform(self):
|
|
31
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
32
32
|
self.dataset = self.stratified_subsampling(
|
|
33
33
|
self.dataset, seed=self.seed, splits=["train"]
|
|
34
34
|
)
|
|
@@ -37,7 +37,7 @@ class RestaurantReviewSentimentClassification(AbsTaskClassification):
|
|
|
37
37
|
superseded_by="RestaurantReviewSentimentClassification.v2",
|
|
38
38
|
)
|
|
39
39
|
|
|
40
|
-
def dataset_transform(self):
|
|
40
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
41
41
|
# labels: 0 negative, 1 positive
|
|
42
42
|
self.dataset = self.dataset.rename_column("polarity", "label")
|
|
43
43
|
self.dataset = self.stratified_subsampling(
|
|
@@ -48,7 +48,7 @@ Mubarak, Hamdy},
|
|
|
48
48
|
superseded_by="TweetSarcasmClassification.v2",
|
|
49
49
|
)
|
|
50
50
|
|
|
51
|
-
def dataset_transform(self):
|
|
51
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
52
52
|
# labels: 0 non-sarcastic, 1 sarcastic
|
|
53
53
|
self.dataset = self.dataset.rename_columns(
|
|
54
54
|
{"tweet": "text", "sarcasm": "label"}
|
|
@@ -36,7 +36,7 @@ class BengaliHateSpeechClassification(AbsTaskClassification):
|
|
|
36
36
|
superseded_by="BengaliHateSpeechClassification.v2",
|
|
37
37
|
)
|
|
38
38
|
|
|
39
|
-
def dataset_transform(self):
|
|
39
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
40
40
|
self.dataset = self.stratified_subsampling(
|
|
41
41
|
self.dataset, seed=self.seed, splits=["train"]
|
|
42
42
|
)
|
|
@@ -36,7 +36,7 @@ class BengaliSentimentAnalysis(AbsTaskClassification):
|
|
|
36
36
|
superseded_by="BengaliSentimentAnalysis.v2",
|
|
37
37
|
)
|
|
38
38
|
|
|
39
|
-
def dataset_transform(self):
|
|
39
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
40
40
|
self.dataset = self.stratified_subsampling(
|
|
41
41
|
self.dataset, seed=self.seed, splits=["train"]
|
|
42
42
|
)
|
|
@@ -37,7 +37,7 @@ class BulgarianStoreReviewSentimentClassfication(AbsTaskClassification):
|
|
|
37
37
|
""",
|
|
38
38
|
)
|
|
39
39
|
|
|
40
|
-
def dataset_transform(self):
|
|
40
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
41
41
|
self.dataset = self.dataset.rename_columns(
|
|
42
42
|
{"Review": "text", "Category": "label"}
|
|
43
43
|
)
|
|
@@ -39,7 +39,7 @@ class CSFDCZMovieReviewSentimentClassification(AbsTaskClassification):
|
|
|
39
39
|
# Increase the samples_per_label in order to improve baseline performance
|
|
40
40
|
samples_per_label = 20
|
|
41
41
|
|
|
42
|
-
def dataset_transform(self):
|
|
42
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
43
43
|
self.dataset = self.dataset.rename_columns(
|
|
44
44
|
{"comment": "text", "rating_int": "label"}
|
|
45
45
|
)
|
|
@@ -85,7 +85,7 @@ class CSFDCZMovieReviewSentimentClassificationV2(AbsTaskClassification):
|
|
|
85
85
|
# Increase the samples_per_label in order to improve baseline performance
|
|
86
86
|
samples_per_label = 20
|
|
87
87
|
|
|
88
|
-
def dataset_transform(self):
|
|
88
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
89
89
|
self.dataset = self.stratified_subsampling(
|
|
90
90
|
self.dataset, seed=self.seed, splits=["test"], n_samples=2048
|
|
91
91
|
)
|
|
@@ -60,7 +60,7 @@ Piperidis, Stelios},
|
|
|
60
60
|
|
|
61
61
|
samples_per_label = 16
|
|
62
62
|
|
|
63
|
-
def dataset_transform(self):
|
|
63
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
64
64
|
# convert label to a 0/1 label
|
|
65
65
|
labels = self.dataset["train"]["label"]
|
|
66
66
|
lab2idx = {lab: idx for idx, lab in enumerate(set(labels))}
|
|
@@ -49,7 +49,7 @@ Zesch, Torsten},
|
|
|
49
49
|
superseded_by="GermanPoliticiansTwitterSentimentClassification.v2",
|
|
50
50
|
)
|
|
51
51
|
|
|
52
|
-
def dataset_transform(self):
|
|
52
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
53
53
|
self.dataset = self.dataset.rename_column("majority_sentiment", "label")
|
|
54
54
|
|
|
55
55
|
|
|
@@ -40,7 +40,7 @@ class DBpediaClassification(AbsTaskClassification):
|
|
|
40
40
|
superseded_by="DBpediaClassification.v2",
|
|
41
41
|
)
|
|
42
42
|
|
|
43
|
-
def dataset_transform(self):
|
|
43
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
44
44
|
self.dataset = self.dataset.rename_column("content", "text")
|
|
45
45
|
self.dataset = self.stratified_subsampling(
|
|
46
46
|
self.dataset, seed=self.seed, splits=["train", "test"]
|
|
@@ -85,7 +85,7 @@ class DBpediaClassificationV2(AbsTaskClassification):
|
|
|
85
85
|
adapted_from=["DBpediaClassification"],
|
|
86
86
|
)
|
|
87
87
|
|
|
88
|
-
def dataset_transform(self):
|
|
88
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
89
89
|
self.dataset = self.stratified_subsampling(
|
|
90
90
|
self.dataset, seed=self.seed, splits=["train", "test"]
|
|
91
91
|
)
|
|
@@ -40,7 +40,7 @@ class ToxicChatClassification(AbsTaskClassification):
|
|
|
40
40
|
superseded_by="ToxicChatClassification.v2",
|
|
41
41
|
)
|
|
42
42
|
|
|
43
|
-
def dataset_transform(self):
|
|
43
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
44
44
|
keep_cols = ["user_input", "toxicity"]
|
|
45
45
|
rename_dict = dict(zip(keep_cols, ["text", "label"]))
|
|
46
46
|
remove_cols = [
|
|
@@ -93,7 +93,7 @@ class ToxicChatClassificationV2(AbsTaskClassification):
|
|
|
93
93
|
adapted_from=["ToxicChatClassification"],
|
|
94
94
|
)
|
|
95
95
|
|
|
96
|
-
def dataset_transform(self):
|
|
96
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
97
97
|
self.dataset = self.stratified_subsampling(
|
|
98
98
|
self.dataset, seed=self.seed, splits=["test"]
|
|
99
99
|
)
|
|
@@ -42,7 +42,7 @@ class ToxicConversationsClassification(AbsTaskClassification):
|
|
|
42
42
|
|
|
43
43
|
samples_per_label = 16
|
|
44
44
|
|
|
45
|
-
def dataset_transform(self):
|
|
45
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
46
46
|
self.dataset = self.stratified_subsampling(
|
|
47
47
|
self.dataset, seed=self.seed, splits=["test"]
|
|
48
48
|
)
|
|
@@ -88,7 +88,7 @@ class ToxicConversationsClassificationV2(AbsTaskClassification):
|
|
|
88
88
|
|
|
89
89
|
samples_per_label = 16
|
|
90
90
|
|
|
91
|
-
def dataset_transform(self):
|
|
91
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
92
92
|
self.dataset = self.stratified_subsampling(
|
|
93
93
|
self.dataset, seed=self.seed, splits=["test"]
|
|
94
94
|
)
|
|
@@ -83,7 +83,7 @@ class YahooAnswersTopicsClassificationV2(AbsTaskClassification):
|
|
|
83
83
|
|
|
84
84
|
samples_per_label = 32
|
|
85
85
|
|
|
86
|
-
def dataset_transform(self):
|
|
86
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
87
87
|
self.dataset = self.stratified_subsampling(
|
|
88
88
|
self.dataset, seed=self.seed, splits=["train", "test"]
|
|
89
89
|
)
|
|
@@ -42,7 +42,7 @@ class YelpReviewFullClassification(AbsTaskClassification):
|
|
|
42
42
|
|
|
43
43
|
samples_per_label = 128
|
|
44
44
|
|
|
45
|
-
def dataset_transform(self):
|
|
45
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
46
46
|
self.dataset = self.stratified_subsampling(
|
|
47
47
|
self.dataset, seed=self.seed, splits=["test"]
|
|
48
48
|
)
|
|
@@ -88,7 +88,7 @@ class YelpReviewFullClassificationV2(AbsTaskClassification):
|
|
|
88
88
|
|
|
89
89
|
samples_per_label = 128
|
|
90
90
|
|
|
91
|
-
def dataset_transform(self):
|
|
91
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
92
92
|
self.dataset = self.stratified_subsampling(
|
|
93
93
|
self.dataset, seed=self.seed, splits=["test"]
|
|
94
94
|
)
|
|
@@ -40,7 +40,7 @@ class EstonianValenceClassification(AbsTaskClassification):
|
|
|
40
40
|
superseded_by="EstonianValenceClassification.v2",
|
|
41
41
|
)
|
|
42
42
|
|
|
43
|
-
def dataset_transform(self):
|
|
43
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
44
44
|
self.dataset = self.dataset.rename_column("paragraph", "text").rename_column(
|
|
45
45
|
"valence", "label"
|
|
46
46
|
)
|