mteb 2.7.2__py3-none-any.whl → 2.7.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/_create_dataloaders.py +63 -14
- mteb/_evaluators/any_sts_evaluator.py +12 -5
- mteb/_evaluators/clustering_evaluator.py +12 -4
- mteb/_evaluators/evaluator.py +11 -5
- mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +14 -5
- mteb/_evaluators/pair_classification_evaluator.py +13 -5
- mteb/_evaluators/retrieval_evaluator.py +22 -13
- mteb/_evaluators/retrieval_metrics.py +9 -3
- mteb/_evaluators/sklearn_evaluator.py +20 -11
- mteb/_evaluators/text/bitext_mining_evaluator.py +10 -3
- mteb/_evaluators/text/summarization_evaluator.py +10 -4
- mteb/_evaluators/zeroshot_classification_evaluator.py +12 -3
- mteb/_helpful_enum.py +5 -1
- mteb/abstasks/_data_filter/filters.py +8 -2
- mteb/abstasks/_data_filter/task_pipelines.py +7 -2
- mteb/abstasks/_statistics_calculation.py +6 -4
- mteb/abstasks/abstask.py +48 -21
- mteb/abstasks/aggregate_task_metadata.py +20 -9
- mteb/abstasks/aggregated_task.py +15 -8
- mteb/abstasks/classification.py +25 -9
- mteb/abstasks/clustering.py +23 -10
- mteb/abstasks/clustering_legacy.py +22 -8
- mteb/abstasks/image/image_text_pair_classification.py +23 -9
- mteb/abstasks/multilabel_classification.py +13 -5
- mteb/abstasks/pair_classification.py +27 -11
- mteb/abstasks/regression.py +14 -6
- mteb/abstasks/retrieval.py +56 -30
- mteb/abstasks/retrieval_dataset_loaders.py +48 -37
- mteb/abstasks/sts.py +29 -13
- mteb/abstasks/task_metadata.py +17 -8
- mteb/abstasks/text/bitext_mining.py +23 -12
- mteb/abstasks/text/reranking.py +2 -2
- mteb/abstasks/text/summarization.py +19 -8
- mteb/abstasks/zeroshot_classification.py +23 -9
- mteb/benchmarks/_create_table.py +13 -7
- mteb/benchmarks/benchmark.py +11 -1
- mteb/benchmarks/benchmarks/__init__.py +2 -0
- mteb/benchmarks/benchmarks/benchmarks.py +41 -2
- mteb/benchmarks/benchmarks/rteb_benchmarks.py +20 -9
- mteb/cache.py +10 -5
- mteb/cli/_display_tasks.py +9 -3
- mteb/cli/build_cli.py +5 -2
- mteb/cli/generate_model_card.py +9 -2
- mteb/deprecated_evaluator.py +16 -12
- mteb/descriptive_stats/Retrieval/BrightAopsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightBiologyLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightBiologyRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEarthScienceLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEarthScienceRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEconomicsLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEconomicsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightLeetcodeRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPonyLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPonyRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPsychologyLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPsychologyRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightRoboticsLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightRoboticsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightStackoverflowLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightStackoverflowRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightSustainableLivingLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightSustainableLivingRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightTheoremQAQuestionsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightTheoremQATheoremsRetrieval.json +35 -0
- mteb/evaluate.py +33 -20
- mteb/filter_tasks.py +12 -7
- mteb/get_tasks.py +9 -4
- mteb/languages/language_scripts.py +8 -3
- mteb/leaderboard/app.py +11 -4
- mteb/leaderboard/table.py +7 -2
- mteb/load_results.py +9 -3
- mteb/models/abs_encoder.py +22 -12
- mteb/models/cache_wrappers/cache_backend_protocol.py +5 -3
- mteb/models/cache_wrappers/cache_backends/_hash_utils.py +8 -4
- mteb/models/cache_wrappers/cache_backends/faiss_cache.py +8 -3
- mteb/models/cache_wrappers/cache_wrapper.py +14 -9
- mteb/models/get_model_meta.py +32 -6
- mteb/models/instruct_wrapper.py +13 -5
- mteb/models/model_implementations/align_models.py +10 -4
- mteb/models/model_implementations/amazon_models.py +1 -0
- mteb/models/model_implementations/andersborges.py +2 -0
- mteb/models/model_implementations/ara_models.py +1 -0
- mteb/models/model_implementations/arctic_models.py +8 -0
- mteb/models/model_implementations/b1ade_models.py +1 -0
- mteb/models/model_implementations/bedrock_models.py +20 -6
- mteb/models/model_implementations/bge_models.py +40 -1
- mteb/models/model_implementations/bica_model.py +1 -0
- mteb/models/model_implementations/blip2_models.py +11 -4
- mteb/models/model_implementations/blip_models.py +17 -4
- mteb/models/model_implementations/bm25.py +24 -14
- mteb/models/model_implementations/bmretriever_models.py +10 -2
- mteb/models/model_implementations/cadet_models.py +1 -0
- mteb/models/model_implementations/cde_models.py +11 -5
- mteb/models/model_implementations/clip_models.py +12 -4
- mteb/models/model_implementations/clips_models.py +3 -0
- mteb/models/model_implementations/codefuse_models.py +5 -0
- mteb/models/model_implementations/codesage_models.py +3 -0
- mteb/models/model_implementations/cohere_models.py +14 -4
- mteb/models/model_implementations/cohere_v.py +14 -4
- mteb/models/model_implementations/colpali_models.py +7 -3
- mteb/models/model_implementations/colqwen_models.py +17 -31
- mteb/models/model_implementations/colsmol_models.py +3 -1
- mteb/models/model_implementations/conan_models.py +11 -4
- mteb/models/model_implementations/dino_models.py +28 -4
- mteb/models/model_implementations/e5_instruct.py +4 -0
- mteb/models/model_implementations/e5_models.py +9 -0
- mteb/models/model_implementations/e5_v.py +10 -4
- mteb/models/model_implementations/eagerworks_models.py +11 -4
- mteb/models/model_implementations/emillykkejensen_models.py +3 -0
- mteb/models/model_implementations/en_code_retriever.py +1 -0
- mteb/models/model_implementations/euler_models.py +1 -0
- mteb/models/model_implementations/evaclip_models.py +13 -4
- mteb/models/model_implementations/fa_models.py +9 -0
- mteb/models/model_implementations/facebookai.py +2 -0
- mteb/models/model_implementations/geogpt_models.py +1 -0
- mteb/models/model_implementations/gme_v_models.py +7 -3
- mteb/models/model_implementations/google_models.py +15 -4
- mteb/models/model_implementations/granite_vision_embedding_models.py +7 -5
- mteb/models/model_implementations/gritlm_models.py +3 -0
- mteb/models/model_implementations/gte_models.py +9 -0
- mteb/models/model_implementations/hinvec_models.py +6 -1
- mteb/models/model_implementations/human.py +1 -0
- mteb/models/model_implementations/ibm_granite_models.py +6 -0
- mteb/models/model_implementations/inf_models.py +2 -0
- mteb/models/model_implementations/jasper_models.py +14 -5
- mteb/models/model_implementations/jina_clip.py +10 -4
- mteb/models/model_implementations/jina_models.py +17 -5
- mteb/models/model_implementations/kalm_models.py +24 -12
- mteb/models/model_implementations/kblab.py +1 -0
- mteb/models/model_implementations/kennethenevoldsen_models.py +2 -0
- mteb/models/model_implementations/kfst.py +1 -0
- mteb/models/model_implementations/kowshik24_models.py +1 -0
- mteb/models/model_implementations/lens_models.py +2 -0
- mteb/models/model_implementations/lgai_embedding_models.py +1 -0
- mteb/models/model_implementations/linq_models.py +7 -1
- mteb/models/model_implementations/listconranker.py +10 -4
- mteb/models/model_implementations/llm2clip_models.py +12 -4
- mteb/models/model_implementations/llm2vec_models.py +20 -6
- mteb/models/model_implementations/mcinext_models.py +8 -2
- mteb/models/model_implementations/mdbr_models.py +2 -0
- mteb/models/model_implementations/misc_models.py +63 -0
- mteb/models/model_implementations/mixedbread_ai_models.py +3 -0
- mteb/models/model_implementations/mme5_models.py +2 -1
- mteb/models/model_implementations/moco_models.py +11 -4
- mteb/models/model_implementations/mod_models.py +2 -1
- mteb/models/model_implementations/model2vec_models.py +23 -4
- mteb/models/model_implementations/moka_models.py +3 -0
- mteb/models/model_implementations/nbailab.py +3 -0
- mteb/models/model_implementations/no_instruct_sentence_models.py +13 -5
- mteb/models/model_implementations/nomic_models.py +17 -4
- mteb/models/model_implementations/nomic_models_vision.py +5 -3
- mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +9 -3
- mteb/models/model_implementations/nvidia_models.py +15 -4
- mteb/models/model_implementations/octen_models.py +3 -1
- mteb/models/model_implementations/openai_models.py +14 -4
- mteb/models/model_implementations/openclip_models.py +17 -4
- mteb/models/model_implementations/opensearch_neural_sparse_models.py +15 -4
- mteb/models/model_implementations/ops_moa_models.py +9 -2
- mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +1 -0
- mteb/models/model_implementations/pawan_models.py +1 -0
- mteb/models/model_implementations/piccolo_models.py +2 -0
- mteb/models/model_implementations/promptriever_models.py +16 -6
- mteb/models/model_implementations/pylate_models.py +32 -13
- mteb/models/model_implementations/qodo_models.py +2 -0
- mteb/models/model_implementations/qtack_models.py +1 -0
- mteb/models/model_implementations/qwen3_models.py +11 -1
- mteb/models/model_implementations/qzhou_models.py +2 -0
- mteb/models/model_implementations/random_baseline.py +4 -3
- mteb/models/model_implementations/rasgaard_models.py +1 -0
- mteb/models/model_implementations/reasonir_model.py +65 -0
- mteb/models/model_implementations/repllama_models.py +15 -6
- mteb/models/model_implementations/rerankers_custom.py +13 -4
- mteb/models/model_implementations/rerankers_monot5_based.py +24 -4
- mteb/models/model_implementations/richinfoai_models.py +1 -0
- mteb/models/model_implementations/ru_sentence_models.py +20 -0
- mteb/models/model_implementations/ruri_models.py +10 -0
- mteb/models/model_implementations/salesforce_models.py +10 -1
- mteb/models/model_implementations/samilpwc_models.py +1 -0
- mteb/models/model_implementations/sarashina_embedding_models.py +2 -0
- mteb/models/model_implementations/searchmap_models.py +1 -0
- mteb/models/model_implementations/seed_1_6_embedding_models.py +5 -2
- mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +6 -2
- mteb/models/model_implementations/seed_models.py +2 -1
- mteb/models/model_implementations/sentence_transformers_models.py +18 -0
- mteb/models/model_implementations/shuu_model.py +1 -0
- mteb/models/model_implementations/siglip_models.py +19 -4
- mteb/models/model_implementations/slm_models.py +7 -4
- mteb/models/model_implementations/sonar_models.py +2 -1
- mteb/models/model_implementations/spartan8806_atles_champion.py +1 -0
- mteb/models/model_implementations/stella_models.py +6 -0
- mteb/models/model_implementations/tarka_models.py +2 -0
- mteb/models/model_implementations/text2vec_models.py +3 -0
- mteb/models/model_implementations/ua_sentence_models.py +1 -0
- mteb/models/model_implementations/uae_models.py +10 -4
- mteb/models/model_implementations/vdr_models.py +8 -1
- mteb/models/model_implementations/vi_vn_models.py +6 -0
- mteb/models/model_implementations/vista_models.py +11 -4
- mteb/models/model_implementations/vlm2vec_models.py +11 -4
- mteb/models/model_implementations/voyage_models.py +52 -4
- mteb/models/model_implementations/voyage_v.py +11 -6
- mteb/models/model_implementations/xyz_models.py +1 -0
- mteb/models/model_implementations/youtu_models.py +1 -0
- mteb/models/model_implementations/yuan_models.py +1 -0
- mteb/models/model_implementations/yuan_models_en.py +2 -1
- mteb/models/model_meta.py +47 -9
- mteb/models/models_protocols.py +23 -18
- mteb/models/search_encoder_index/search_backend_protocol.py +7 -3
- mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +12 -4
- mteb/models/search_wrappers.py +31 -12
- mteb/models/sentence_transformer_wrapper.py +4 -3
- mteb/models/vllm_wrapper.py +8 -6
- mteb/results/benchmark_results.py +22 -17
- mteb/results/model_result.py +21 -15
- mteb/results/task_result.py +32 -16
- mteb/similarity_functions.py +8 -2
- mteb/tasks/aggregated_tasks/eng/cqadupstack_retrieval.py +3 -3
- mteb/tasks/aggregated_tasks/eng/sts17_multilingual_visual_sts_eng.py +3 -3
- mteb/tasks/aggregated_tasks/eng/sts_benchmark_multilingual_visual_sts_eng.py +3 -3
- mteb/tasks/aggregated_tasks/fas/cqadupstack_retrieval_fa.py +3 -3
- mteb/tasks/aggregated_tasks/fas/syn_per_chatbot_conv_sa_classification.py +3 -3
- mteb/tasks/aggregated_tasks/multilingual/sts17_multilingual_vision_sts.py +3 -3
- mteb/tasks/aggregated_tasks/multilingual/sts_benchmark_multilingual_visual_sts.py +3 -3
- mteb/tasks/aggregated_tasks/nld/cqadupstack_nl_retrieval.py +3 -3
- mteb/tasks/aggregated_tasks/pol/cqadupstack_retrieval_pl.py +3 -3
- mteb/tasks/bitext_mining/eng/pub_chem_smiles_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/fas/fa_mteb_summary_retrieval.py +3 -3
- mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/flores_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/in22_conv_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/in22_gen_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/norwegian_courts_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/ntrex_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/roma_tales_bitext_mining.py +2 -2
- mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -2
- mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -1
- mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -1
- mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -1
- mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -1
- mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -1
- mteb/tasks/classification/bul/bulgarian_store_review_sentiment_classfication.py +1 -1
- mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +2 -2
- mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -1
- mteb/tasks/classification/dan/dk_hate_classification.py +1 -1
- mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -1
- mteb/tasks/classification/ell/greek_legal_code_classification.py +1 -1
- mteb/tasks/classification/eng/dbpedia_classification.py +2 -2
- mteb/tasks/classification/eng/toxic_chat_classification.py +2 -2
- mteb/tasks/classification/eng/toxic_conversations_classification.py +2 -2
- mteb/tasks/classification/eng/tweet_topic_single_classification.py +1 -1
- mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -1
- mteb/tasks/classification/eng/yelp_review_full_classification.py +2 -2
- mteb/tasks/classification/est/estonian_valence.py +1 -1
- mteb/tasks/classification/fas/fa_mteb_classification.py +6 -6
- mteb/tasks/classification/fas/persian_food_sentiment_classification.py +1 -1
- mteb/tasks/classification/fil/filipino_shopee_reviews_classification.py +1 -1
- mteb/tasks/classification/fin/fin_toxicity_classification.py +1 -1
- mteb/tasks/classification/fra/french_book_reviews.py +2 -2
- mteb/tasks/classification/fra/movie_review_sentiment_classification.py +2 -2
- mteb/tasks/classification/guj/gujarati_news_classification.py +1 -1
- mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -1
- mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -1
- mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +2 -2
- mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -1
- mteb/tasks/classification/ita/dado_eval_coarse_classification.py +1 -1
- mteb/tasks/classification/ita/ita_casehold_classification.py +1 -1
- mteb/tasks/classification/ita/sardi_stance_classification.py +1 -1
- mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -1
- mteb/tasks/classification/jpn/wrime_classification.py +1 -1
- mteb/tasks/classification/kan/kannada_news_classification.py +2 -2
- mteb/tasks/classification/kor/klue_tc.py +2 -2
- mteb/tasks/classification/kor/kor_fin.py +1 -1
- mteb/tasks/classification/kor/kor_hate_classification.py +1 -1
- mteb/tasks/classification/kor/kor_sarcasm_classification.py +1 -1
- mteb/tasks/classification/mal/malayalam_news_classification.py +1 -1
- mteb/tasks/classification/mar/marathi_news_classification.py +1 -1
- mteb/tasks/classification/multilingual/afri_senti_lang_classification.py +1 -1
- mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -1
- mteb/tasks/classification/multilingual/cyrillic_turkic_lang_classification.py +1 -1
- mteb/tasks/classification/multilingual/indic_nlp_news_classification.py +1 -1
- mteb/tasks/classification/multilingual/masakha_news_classification.py +1 -1
- mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -1
- mteb/tasks/classification/multilingual/multilingual_sentiment_classification.py +1 -1
- mteb/tasks/classification/multilingual/scala_classification.py +1 -1
- mteb/tasks/classification/multilingual/sib200_classification.py +1 -1
- mteb/tasks/classification/multilingual/turkic_classification.py +1 -1
- mteb/tasks/classification/multilingual/tweet_sentiment_classification.py +1 -1
- mteb/tasks/classification/nep/nepali_news_classification.py +2 -2
- mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +1 -1
- mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +1 -1
- mteb/tasks/classification/ory/odia_news_classification.py +2 -2
- mteb/tasks/classification/pan/punjabi_news_classification.py +1 -1
- mteb/tasks/classification/ron/moroco.py +1 -1
- mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -1
- mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -1
- mteb/tasks/classification/rus/georeview_classification.py +1 -1
- mteb/tasks/classification/rus/headline_classification.py +2 -2
- mteb/tasks/classification/rus/inappropriateness_classification.py +2 -2
- mteb/tasks/classification/rus/ru_reviews_classification.py +2 -2
- mteb/tasks/classification/rus/ru_sci_bench_grnti_classification.py +1 -1
- mteb/tasks/classification/rus/ru_sci_bench_oecd_classification.py +1 -1
- mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -1
- mteb/tasks/classification/san/sanskrit_shlokas_classification.py +1 -1
- mteb/tasks/classification/sin/sinhala_news_classification.py +2 -2
- mteb/tasks/classification/sin/sinhala_news_source_classification.py +2 -2
- mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +2 -2
- mteb/tasks/classification/slv/frenk_sl_classification.py +1 -1
- mteb/tasks/classification/spa/spanish_news_classification.py +2 -2
- mteb/tasks/classification/ssw/siswati_news_classification.py +1 -1
- mteb/tasks/classification/tam/tamil_news_classification.py +2 -2
- mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +2 -2
- mteb/tasks/classification/tha/wongnai_reviews_classification.py +1 -1
- mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +2 -2
- mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -2
- mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -1
- mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -1
- mteb/tasks/classification/zho/yue_openrice_review_classification.py +2 -2
- mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -1
- mteb/tasks/clustering/deu/blurbs_clustering_p2p.py +1 -1
- mteb/tasks/clustering/deu/blurbs_clustering_s2s.py +1 -1
- mteb/tasks/clustering/eng/arxiv_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/arxiv_hierarchical_clustering.py +2 -2
- mteb/tasks/clustering/eng/big_patent_clustering.py +1 -1
- mteb/tasks/clustering/eng/biorxiv_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/biorxiv_clustering_s2s.py +1 -1
- mteb/tasks/clustering/eng/medrxiv_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/medrxiv_clustering_s2s.py +1 -1
- mteb/tasks/clustering/eng/reddit_clustering.py +1 -1
- mteb/tasks/clustering/eng/reddit_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/stack_exchange_clustering.py +1 -1
- mteb/tasks/clustering/eng/stack_exchange_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/twenty_newsgroups_clustering.py +1 -1
- mteb/tasks/clustering/fas/fa_mteb_clustering.py +4 -4
- mteb/tasks/clustering/fra/hal_clustering_s2s.py +2 -2
- mteb/tasks/clustering/multilingual/mlsum_clustering_p2p.py +2 -2
- mteb/tasks/clustering/multilingual/mlsum_clustering_s2s.py +2 -2
- mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -1
- mteb/tasks/clustering/multilingual/wiki_clustering_p2p.py +1 -1
- mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +1 -1
- mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +1 -1
- mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +1 -1
- mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +1 -1
- mteb/tasks/clustering/nld/vabb_clustering_p2p.py +1 -1
- mteb/tasks/clustering/nld/vabb_clustering_s2s.py +1 -1
- mteb/tasks/clustering/nob/snl_clustering.py +8 -3
- mteb/tasks/clustering/nob/vg_clustering.py +8 -3
- mteb/tasks/clustering/pol/polish_clustering.py +3 -3
- mteb/tasks/clustering/rus/ru_sci_bench_grnti_clustering_p2p.py +1 -1
- mteb/tasks/clustering/rus/ru_sci_bench_oecd_clustering_p2p.py +1 -1
- mteb/tasks/clustering/zho/cmteb_clustering.py +4 -4
- mteb/tasks/image_text_pair_classification/eng/image_co_de.py +1 -1
- mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +1 -1
- mteb/tasks/instruction_reranking/multilingual/m_follow_ir.py +2 -2
- mteb/tasks/multichoice/eng/cv_bench.py +4 -4
- mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -1
- mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -1
- mteb/tasks/multilabel_classification/rus/ru_toixic_multilabelclassification_okmlcup.py +1 -1
- mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
- mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -1
- mteb/tasks/pair_classification/ara/ar_entail.py +1 -1
- mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -1
- mteb/tasks/pair_classification/deu/false_friends_de_en_pc.py +1 -1
- mteb/tasks/pair_classification/eng/pub_chem_ai_sentence_paraphrase_pc.py +1 -1
- mteb/tasks/pair_classification/eng/pub_chem_smilespc.py +4 -3
- mteb/tasks/pair_classification/eng/pub_chem_synonym_pc.py +1 -1
- mteb/tasks/pair_classification/eng/pub_chem_wiki_paragraphs_pc.py +1 -1
- mteb/tasks/pair_classification/eng/sprint_duplicate_questions_pc.py +1 -1
- mteb/tasks/pair_classification/eng/twitter_sem_eval2015_pc.py +1 -1
- mteb/tasks/pair_classification/eng/twitter_url_corpus_pc.py +1 -1
- mteb/tasks/pair_classification/fas/fa_mteb_pair_classification.py +5 -5
- mteb/tasks/pair_classification/fas/fars_tail.py +2 -2
- mteb/tasks/pair_classification/hye/armenian_paraphrase_pc.py +1 -1
- mteb/tasks/pair_classification/ita/dis_co_tex_pair_classification.py +1 -1
- mteb/tasks/pair_classification/kor/klue_nli.py +1 -1
- mteb/tasks/pair_classification/multilingual/rte3.py +2 -2
- mteb/tasks/pair_classification/multilingual/xnli.py +1 -1
- mteb/tasks/pair_classification/pol/polish_pc.py +4 -4
- mteb/tasks/pair_classification/por/assin2_rte.py +1 -1
- mteb/tasks/pair_classification/por/sick_br_pc.py +1 -1
- mteb/tasks/pair_classification/rus/terra.py +2 -2
- mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -1
- mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -1
- mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -1
- mteb/tasks/pair_classification/zho/cmteb_pair_classification.py +2 -2
- mteb/tasks/retrieval/ara/sadeem_question_retrieval.py +1 -1
- mteb/tasks/retrieval/code/code_edit_search_retrieval.py +1 -1
- mteb/tasks/retrieval/code/code_rag.py +4 -4
- mteb/tasks/retrieval/code/code_search_net_cc_retrieval.py +1 -1
- mteb/tasks/retrieval/code/coir_code_search_net_retrieval.py +1 -1
- mteb/tasks/retrieval/code/ds1000_retrieval.py +1 -1
- mteb/tasks/retrieval/code/fresh_stack_retrieval.py +1 -1
- mteb/tasks/retrieval/code/human_eval_retrieval.py +1 -1
- mteb/tasks/retrieval/code/mbpp_retrieval.py +1 -1
- mteb/tasks/retrieval/code/wiki_sql_retrieval.py +1 -1
- mteb/tasks/retrieval/dan/dan_fever_retrieval.py +1 -1
- mteb/tasks/retrieval/dan/tv2_nordretrieval.py +1 -1
- mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +1 -1
- mteb/tasks/retrieval/deu/german_gov_service_retrieval.py +1 -1
- mteb/tasks/retrieval/deu/german_qu_ad_retrieval.py +1 -1
- mteb/tasks/retrieval/ell/greek_civics_qa.py +1 -1
- mteb/tasks/retrieval/eng/__init__.py +42 -0
- mteb/tasks/retrieval/eng/bright_retrieval.py +10 -2
- mteb/tasks/retrieval/eng/bright_v1_1_retrieval.py +968 -0
- mteb/tasks/retrieval/eng/chat_doctor_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/fin_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/finance_bench_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/hc3_finance_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_narrative_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_needle_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_passkey_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_summ_screen_fd_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_wikim_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lembqm_sum_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/limit_retrieval.py +6 -1
- mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/ml_questions.py +1 -1
- mteb/tasks/retrieval/eng/nano_argu_ana_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_climate_fever_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_db_pedia_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_fever_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_fi_qa2018_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_hotpot_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_msmarco_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_nf_corpus_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_nq_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_quora_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_sci_fact_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_scidocs_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_touche2020_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/narrative_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/r2_med_retrieval.py +8 -8
- mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +10 -10
- mteb/tasks/retrieval/fra/f_qu_ad_retrieval.py +1 -1
- mteb/tasks/retrieval/fra/syntec_retrieval.py +1 -1
- mteb/tasks/retrieval/hun/hun_sum2.py +1 -1
- mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt19.py +1 -1
- mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt21.py +1 -1
- mteb/tasks/retrieval/multilingual/cur_ev1_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/mr_tidy_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +5 -5
- mteb/tasks/retrieval/multilingual/statcan_dialogue_dataset_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +14 -4
- mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/x_qu_ad_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_android_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_english_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_gaming_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_gis_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_mathematica_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_physics_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_programmers_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_stats_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_tex_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_unix_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_webmasters_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_wordpress_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nob/norquad.py +1 -1
- mteb/tasks/retrieval/nob/snl_retrieval.py +1 -1
- mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -1
- mteb/tasks/retrieval/vie/vie_qu_ad_retrieval.py +1 -1
- mteb/tasks/sts/fao/faroese_sts.py +1 -1
- mteb/tasks/sts/fra/sick_fr_sts.py +1 -1
- mteb/tasks/sts/kor/klue_sts.py +1 -1
- mteb/tasks/sts/por/sick_br_sts.py +1 -1
- mteb/tasks/sts/rus/ru_para_phraser_sts.py +1 -1
- mteb/tasks/zeroshot_classification/eng/sci_mmir.py +1 -1
- mteb/types/_encoder_io.py +1 -1
- mteb/types/statistics.py +9 -2
- {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/METADATA +1 -1
- {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/RECORD +486 -465
- {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/WHEEL +1 -1
- {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/entry_points.txt +0 -0
- {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/top_level.txt +0 -0
mteb/abstasks/classification.py
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import logging
|
|
2
4
|
from collections import defaultdict
|
|
3
|
-
from
|
|
4
|
-
from typing import Any, TypedDict
|
|
5
|
+
from typing import TYPE_CHECKING, Any, TypedDict
|
|
5
6
|
|
|
6
7
|
import numpy as np
|
|
7
8
|
from datasets import Dataset, DatasetDict
|
|
@@ -16,12 +17,8 @@ from sklearn.metrics import (
|
|
|
16
17
|
|
|
17
18
|
from mteb._evaluators.sklearn_evaluator import SklearnEvaluator, SklearnModelProtocol
|
|
18
19
|
from mteb.models import EncoderProtocol, MTEBModels
|
|
19
|
-
from mteb.types import EncodeKwargs, HFSubset, ScoresDict
|
|
20
20
|
from mteb.types.statistics import (
|
|
21
|
-
ImageStatistics,
|
|
22
|
-
LabelStatistics,
|
|
23
21
|
SplitDescriptiveStatistics,
|
|
24
|
-
TextStatistics,
|
|
25
22
|
)
|
|
26
23
|
|
|
27
24
|
from ._statistics_calculation import (
|
|
@@ -31,6 +28,18 @@ from ._statistics_calculation import (
|
|
|
31
28
|
)
|
|
32
29
|
from .abstask import AbsTask
|
|
33
30
|
|
|
31
|
+
if TYPE_CHECKING:
|
|
32
|
+
from pathlib import Path
|
|
33
|
+
|
|
34
|
+
from mteb._evaluators.sklearn_evaluator import SklearnModelProtocol
|
|
35
|
+
from mteb.models import MTEBModels
|
|
36
|
+
from mteb.types import EncodeKwargs, HFSubset, ScoresDict
|
|
37
|
+
from mteb.types.statistics import (
|
|
38
|
+
ImageStatistics,
|
|
39
|
+
LabelStatistics,
|
|
40
|
+
TextStatistics,
|
|
41
|
+
)
|
|
42
|
+
|
|
34
43
|
logger = logging.getLogger(__name__)
|
|
35
44
|
|
|
36
45
|
|
|
@@ -127,6 +136,7 @@ class AbsTaskClassification(AbsTask):
|
|
|
127
136
|
*,
|
|
128
137
|
encode_kwargs: EncodeKwargs,
|
|
129
138
|
prediction_folder: Path | None = None,
|
|
139
|
+
num_proc: int = 1,
|
|
130
140
|
**kwargs: Any,
|
|
131
141
|
) -> dict[HFSubset, ScoresDict]:
|
|
132
142
|
"""Evaluate a model on the classification task.
|
|
@@ -140,7 +150,7 @@ class AbsTaskClassification(AbsTask):
|
|
|
140
150
|
)
|
|
141
151
|
|
|
142
152
|
if not self.data_loaded:
|
|
143
|
-
self.load_data()
|
|
153
|
+
self.load_data(num_proc=num_proc)
|
|
144
154
|
|
|
145
155
|
if self.dataset is None:
|
|
146
156
|
raise RuntimeError("Dataset not loaded.")
|
|
@@ -173,6 +183,7 @@ class AbsTaskClassification(AbsTask):
|
|
|
173
183
|
hf_subset=hf_subset,
|
|
174
184
|
encode_kwargs=encode_kwargs,
|
|
175
185
|
prediction_folder=prediction_folder,
|
|
186
|
+
num_proc=num_proc,
|
|
176
187
|
**kwargs,
|
|
177
188
|
)
|
|
178
189
|
self._add_main_score(scores[hf_subset])
|
|
@@ -188,6 +199,7 @@ class AbsTaskClassification(AbsTask):
|
|
|
188
199
|
hf_split: str,
|
|
189
200
|
hf_subset: str,
|
|
190
201
|
prediction_folder: Path | None = None,
|
|
202
|
+
num_proc: int = 1,
|
|
191
203
|
**kwargs: Any,
|
|
192
204
|
) -> FullClassificationMetrics:
|
|
193
205
|
if not isinstance(model, EncoderProtocol):
|
|
@@ -221,7 +233,10 @@ class AbsTaskClassification(AbsTask):
|
|
|
221
233
|
evaluator_model=self.evaluator_model,
|
|
222
234
|
)
|
|
223
235
|
y_pred, test_cache = evaluator(
|
|
224
|
-
model,
|
|
236
|
+
model,
|
|
237
|
+
encode_kwargs=encode_kwargs,
|
|
238
|
+
test_cache=test_cache,
|
|
239
|
+
num_proc=num_proc,
|
|
225
240
|
)
|
|
226
241
|
if prediction_folder:
|
|
227
242
|
all_predictions.append(y_pred.tolist())
|
|
@@ -363,11 +378,12 @@ class AbsTaskClassification(AbsTask):
|
|
|
363
378
|
label_statistics=label_statistics,
|
|
364
379
|
)
|
|
365
380
|
|
|
366
|
-
def _push_dataset_to_hub(self, repo_name: str) -> None:
|
|
381
|
+
def _push_dataset_to_hub(self, repo_name: str, num_proc: int = 1) -> None:
|
|
367
382
|
self._upload_dataset_to_hub(
|
|
368
383
|
repo_name,
|
|
369
384
|
[
|
|
370
385
|
self.input_column_name,
|
|
371
386
|
self.label_column_name,
|
|
372
387
|
],
|
|
388
|
+
num_proc=num_proc,
|
|
373
389
|
)
|
mteb/abstasks/clustering.py
CHANGED
|
@@ -1,9 +1,10 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import itertools
|
|
2
4
|
import logging
|
|
3
5
|
import random
|
|
4
6
|
from collections import defaultdict
|
|
5
|
-
from
|
|
6
|
-
from typing import Any, cast
|
|
7
|
+
from typing import TYPE_CHECKING, Any, cast
|
|
7
8
|
|
|
8
9
|
import numpy as np
|
|
9
10
|
from datasets import Dataset, DatasetDict
|
|
@@ -11,13 +12,10 @@ from sklearn.cluster import MiniBatchKMeans
|
|
|
11
12
|
from sklearn.metrics.cluster import v_measure_score
|
|
12
13
|
|
|
13
14
|
from mteb._create_dataloaders import create_dataloader
|
|
14
|
-
from mteb.models import EncoderProtocol
|
|
15
|
-
from mteb.types import Array,
|
|
15
|
+
from mteb.models import EncoderProtocol
|
|
16
|
+
from mteb.types import Array, HFSubset
|
|
16
17
|
from mteb.types.statistics import (
|
|
17
|
-
ImageStatistics,
|
|
18
|
-
LabelStatistics,
|
|
19
18
|
SplitDescriptiveStatistics,
|
|
20
|
-
TextStatistics,
|
|
21
19
|
)
|
|
22
20
|
|
|
23
21
|
from ._statistics_calculation import (
|
|
@@ -27,6 +25,17 @@ from ._statistics_calculation import (
|
|
|
27
25
|
)
|
|
28
26
|
from .abstask import AbsTask
|
|
29
27
|
|
|
28
|
+
if TYPE_CHECKING:
|
|
29
|
+
from pathlib import Path
|
|
30
|
+
|
|
31
|
+
from mteb.models import MTEBModels
|
|
32
|
+
from mteb.types import Array, EncodeKwargs, ScoresDict
|
|
33
|
+
from mteb.types.statistics import (
|
|
34
|
+
ImageStatistics,
|
|
35
|
+
LabelStatistics,
|
|
36
|
+
TextStatistics,
|
|
37
|
+
)
|
|
38
|
+
|
|
30
39
|
logger = logging.getLogger(__name__)
|
|
31
40
|
|
|
32
41
|
|
|
@@ -160,6 +169,7 @@ class AbsTaskClustering(AbsTask):
|
|
|
160
169
|
hf_split: str,
|
|
161
170
|
hf_subset: str,
|
|
162
171
|
prediction_folder: Path | None = None,
|
|
172
|
+
num_proc: int = 1,
|
|
163
173
|
**kwargs: Any,
|
|
164
174
|
) -> ScoresDict:
|
|
165
175
|
if not isinstance(model, EncoderProtocol):
|
|
@@ -186,7 +196,7 @@ class AbsTaskClustering(AbsTask):
|
|
|
186
196
|
self.max_fraction_of_documents_to_embed * len(data_split)
|
|
187
197
|
)
|
|
188
198
|
else:
|
|
189
|
-
max_documents_to_embed = cast(int, self.max_document_to_embed)
|
|
199
|
+
max_documents_to_embed = cast("int", self.max_document_to_embed)
|
|
190
200
|
|
|
191
201
|
max_documents_to_embed = min(len(data_split), max_documents_to_embed)
|
|
192
202
|
example_indices = self.rng_state.sample(
|
|
@@ -204,6 +214,7 @@ class AbsTaskClustering(AbsTask):
|
|
|
204
214
|
downsampled_dataset,
|
|
205
215
|
self.metadata,
|
|
206
216
|
input_column=self.input_column_name,
|
|
217
|
+
num_proc=num_proc,
|
|
207
218
|
**encode_kwargs,
|
|
208
219
|
),
|
|
209
220
|
task_metadata=self.metadata,
|
|
@@ -287,9 +298,11 @@ class AbsTaskClustering(AbsTask):
|
|
|
287
298
|
labels_statistics=label_statistics,
|
|
288
299
|
)
|
|
289
300
|
|
|
290
|
-
def _push_dataset_to_hub(self, repo_name: str) -> None:
|
|
301
|
+
def _push_dataset_to_hub(self, repo_name: str, num_proc: int = 1) -> None:
|
|
291
302
|
self._upload_dataset_to_hub(
|
|
292
|
-
repo_name,
|
|
303
|
+
repo_name,
|
|
304
|
+
[self.input_column_name, self.label_column_name],
|
|
305
|
+
num_proc=num_proc,
|
|
293
306
|
)
|
|
294
307
|
|
|
295
308
|
|
|
@@ -1,6 +1,7 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import logging
|
|
2
|
-
from
|
|
3
|
-
from typing import Any, TypedDict
|
|
4
|
+
from typing import TYPE_CHECKING, Any, TypedDict
|
|
4
5
|
|
|
5
6
|
import numpy as np
|
|
6
7
|
from datasets import Dataset
|
|
@@ -9,12 +10,8 @@ from sklearn import metrics
|
|
|
9
10
|
|
|
10
11
|
from mteb._evaluators import ClusteringEvaluator
|
|
11
12
|
from mteb.models import EncoderProtocol, MTEBModels
|
|
12
|
-
from mteb.types import EncodeKwargs, ScoresDict
|
|
13
13
|
from mteb.types.statistics import (
|
|
14
|
-
ImageStatistics,
|
|
15
|
-
LabelStatistics,
|
|
16
14
|
SplitDescriptiveStatistics,
|
|
17
|
-
TextStatistics,
|
|
18
15
|
)
|
|
19
16
|
|
|
20
17
|
from ._statistics_calculation import (
|
|
@@ -24,6 +21,17 @@ from ._statistics_calculation import (
|
|
|
24
21
|
)
|
|
25
22
|
from .abstask import AbsTask
|
|
26
23
|
|
|
24
|
+
if TYPE_CHECKING:
|
|
25
|
+
from pathlib import Path
|
|
26
|
+
|
|
27
|
+
from mteb.models import MTEBModels
|
|
28
|
+
from mteb.types import EncodeKwargs, ScoresDict
|
|
29
|
+
from mteb.types.statistics import (
|
|
30
|
+
ImageStatistics,
|
|
31
|
+
LabelStatistics,
|
|
32
|
+
TextStatistics,
|
|
33
|
+
)
|
|
34
|
+
|
|
27
35
|
logger = logging.getLogger(__name__)
|
|
28
36
|
|
|
29
37
|
|
|
@@ -87,6 +95,7 @@ class AbsTaskClusteringLegacy(AbsTask):
|
|
|
87
95
|
hf_split: str,
|
|
88
96
|
hf_subset: str,
|
|
89
97
|
prediction_folder: Path | None = None,
|
|
98
|
+
num_proc: int = 1,
|
|
90
99
|
**kwargs: Any,
|
|
91
100
|
) -> ScoresDict:
|
|
92
101
|
if not isinstance(model, EncoderProtocol):
|
|
@@ -151,7 +160,11 @@ class AbsTaskClusteringLegacy(AbsTask):
|
|
|
151
160
|
hf_subset=hf_subset,
|
|
152
161
|
**kwargs,
|
|
153
162
|
)
|
|
154
|
-
evaluate_clusters = evaluator(
|
|
163
|
+
evaluate_clusters = evaluator(
|
|
164
|
+
model,
|
|
165
|
+
encode_kwargs=encode_kwargs,
|
|
166
|
+
num_proc=num_proc,
|
|
167
|
+
)
|
|
155
168
|
if prediction_folder:
|
|
156
169
|
self._save_task_predictions(
|
|
157
170
|
evaluate_clusters,
|
|
@@ -230,11 +243,12 @@ class AbsTaskClusteringLegacy(AbsTask):
|
|
|
230
243
|
label_statistics=label_statistics,
|
|
231
244
|
)
|
|
232
245
|
|
|
233
|
-
def _push_dataset_to_hub(self, repo_name: str) -> None:
|
|
246
|
+
def _push_dataset_to_hub(self, repo_name: str, num_proc: int = 1) -> None:
|
|
234
247
|
self._upload_dataset_to_hub(
|
|
235
248
|
repo_name,
|
|
236
249
|
[
|
|
237
250
|
self.input_column_name,
|
|
238
251
|
self.label_column_name,
|
|
239
252
|
],
|
|
253
|
+
num_proc=num_proc,
|
|
240
254
|
)
|
|
@@ -1,10 +1,11 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import logging
|
|
2
4
|
from collections.abc import Sequence
|
|
3
|
-
from
|
|
4
|
-
from typing import Any, TypedDict
|
|
5
|
+
from typing import TYPE_CHECKING, Any, TypedDict
|
|
5
6
|
|
|
6
7
|
import torch
|
|
7
|
-
from datasets import
|
|
8
|
+
from datasets import concatenate_datasets
|
|
8
9
|
|
|
9
10
|
from mteb._evaluators import ImageTextPairClassificationEvaluator
|
|
10
11
|
from mteb.abstasks._statistics_calculation import (
|
|
@@ -12,14 +13,23 @@ from mteb.abstasks._statistics_calculation import (
|
|
|
12
13
|
calculate_text_statistics,
|
|
13
14
|
)
|
|
14
15
|
from mteb.abstasks.abstask import AbsTask
|
|
15
|
-
from mteb.models.models_protocols import EncoderProtocol
|
|
16
|
-
from mteb.types import EncodeKwargs
|
|
16
|
+
from mteb.models.models_protocols import EncoderProtocol
|
|
17
17
|
from mteb.types.statistics import (
|
|
18
|
-
ImageStatistics,
|
|
19
18
|
SplitDescriptiveStatistics,
|
|
20
|
-
TextStatistics,
|
|
21
19
|
)
|
|
22
20
|
|
|
21
|
+
if TYPE_CHECKING:
|
|
22
|
+
from pathlib import Path
|
|
23
|
+
|
|
24
|
+
from datasets import Dataset
|
|
25
|
+
|
|
26
|
+
from mteb.models.models_protocols import MTEBModels
|
|
27
|
+
from mteb.types import EncodeKwargs
|
|
28
|
+
from mteb.types.statistics import (
|
|
29
|
+
ImageStatistics,
|
|
30
|
+
TextStatistics,
|
|
31
|
+
)
|
|
32
|
+
|
|
23
33
|
logger = logging.getLogger(__name__)
|
|
24
34
|
|
|
25
35
|
|
|
@@ -124,6 +134,7 @@ class AbsTaskImageTextPairClassification(AbsTask):
|
|
|
124
134
|
hf_split: str,
|
|
125
135
|
hf_subset: str,
|
|
126
136
|
prediction_folder: Path | None = None,
|
|
137
|
+
num_proc: int = 1,
|
|
127
138
|
**kwargs: Any,
|
|
128
139
|
) -> ImageTextPairClassificationMetrics:
|
|
129
140
|
if not isinstance(model, EncoderProtocol):
|
|
@@ -157,7 +168,9 @@ class AbsTaskImageTextPairClassification(AbsTask):
|
|
|
157
168
|
hf_subset=hf_subset,
|
|
158
169
|
**kwargs,
|
|
159
170
|
)
|
|
160
|
-
scores: list[torch.Tensor] = evaluator(
|
|
171
|
+
scores: list[torch.Tensor] = evaluator(
|
|
172
|
+
model, encode_kwargs=encode_kwargs, num_proc=num_proc
|
|
173
|
+
) # type: ignore[assignment]
|
|
161
174
|
if prediction_folder:
|
|
162
175
|
self._save_task_predictions(
|
|
163
176
|
[score.tolist() for score in scores],
|
|
@@ -205,7 +218,7 @@ class AbsTaskImageTextPairClassification(AbsTask):
|
|
|
205
218
|
accuracy=torch.Tensor(all_correct_scores).float().mean().item(),
|
|
206
219
|
)
|
|
207
220
|
|
|
208
|
-
def _push_dataset_to_hub(self, repo_name: str) -> None:
|
|
221
|
+
def _push_dataset_to_hub(self, repo_name: str, num_proc: int = 1) -> None:
|
|
209
222
|
text_columns = (
|
|
210
223
|
[self.texts_column_names]
|
|
211
224
|
if isinstance(self.texts_column_names, str)
|
|
@@ -220,4 +233,5 @@ class AbsTaskImageTextPairClassification(AbsTask):
|
|
|
220
233
|
self._upload_dataset_to_hub(
|
|
221
234
|
repo_name,
|
|
222
235
|
[*text_columns, *image_columns],
|
|
236
|
+
num_proc=num_proc,
|
|
223
237
|
)
|
|
@@ -1,8 +1,9 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import itertools
|
|
2
4
|
import logging
|
|
3
5
|
from collections import defaultdict
|
|
4
|
-
from
|
|
5
|
-
from typing import Any, TypedDict
|
|
6
|
+
from typing import TYPE_CHECKING, Any, TypedDict
|
|
6
7
|
|
|
7
8
|
import numpy as np
|
|
8
9
|
from datasets import DatasetDict
|
|
@@ -15,12 +16,17 @@ from typing_extensions import override
|
|
|
15
16
|
|
|
16
17
|
from mteb._create_dataloaders import create_dataloader
|
|
17
18
|
from mteb._evaluators.classification_metrics import hamming_score
|
|
18
|
-
from mteb.
|
|
19
|
-
from mteb.models import EncoderProtocol, MTEBModels
|
|
20
|
-
from mteb.types import Array, EncodeKwargs
|
|
19
|
+
from mteb.models import EncoderProtocol
|
|
21
20
|
|
|
22
21
|
from .classification import AbsTaskClassification
|
|
23
22
|
|
|
23
|
+
if TYPE_CHECKING:
|
|
24
|
+
from pathlib import Path
|
|
25
|
+
|
|
26
|
+
from mteb._evaluators.sklearn_evaluator import SklearnModelProtocol
|
|
27
|
+
from mteb.models import MTEBModels
|
|
28
|
+
from mteb.types import Array, EncodeKwargs
|
|
29
|
+
|
|
24
30
|
logger = logging.getLogger(__name__)
|
|
25
31
|
|
|
26
32
|
|
|
@@ -87,6 +93,7 @@ class AbsTaskMultilabelClassification(AbsTaskClassification):
|
|
|
87
93
|
hf_split: str,
|
|
88
94
|
hf_subset: str,
|
|
89
95
|
prediction_folder: Path | None = None,
|
|
96
|
+
num_proc: int = 1,
|
|
90
97
|
**kwargs: Any,
|
|
91
98
|
) -> FullMultilabelClassificationMetrics:
|
|
92
99
|
if not isinstance(model, EncoderProtocol):
|
|
@@ -119,6 +126,7 @@ class AbsTaskMultilabelClassification(AbsTaskClassification):
|
|
|
119
126
|
unique_train_dataset,
|
|
120
127
|
self.metadata,
|
|
121
128
|
input_column=self.input_column_name,
|
|
129
|
+
num_proc=num_proc,
|
|
122
130
|
**encode_kwargs,
|
|
123
131
|
)
|
|
124
132
|
|
|
@@ -1,16 +1,15 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import hashlib
|
|
2
4
|
import logging
|
|
3
5
|
from collections import defaultdict
|
|
4
|
-
from
|
|
6
|
+
from typing import TYPE_CHECKING
|
|
5
7
|
|
|
6
8
|
import numpy as np
|
|
7
9
|
from datasets import Dataset
|
|
8
10
|
from sklearn.metrics import average_precision_score
|
|
9
11
|
|
|
10
12
|
from mteb._evaluators import PairClassificationEvaluator
|
|
11
|
-
from mteb._evaluators.pair_classification_evaluator import (
|
|
12
|
-
PairClassificationDistances,
|
|
13
|
-
)
|
|
14
13
|
from mteb.abstasks._statistics_calculation import (
|
|
15
14
|
calculate_image_statistics,
|
|
16
15
|
calculate_label_statistics,
|
|
@@ -18,15 +17,26 @@ from mteb.abstasks._statistics_calculation import (
|
|
|
18
17
|
)
|
|
19
18
|
from mteb.abstasks.abstask import AbsTask
|
|
20
19
|
from mteb.models.model_meta import ScoringFunction
|
|
21
|
-
from mteb.models.models_protocols import EncoderProtocol
|
|
22
|
-
from mteb.types import EncodeKwargs, PromptType
|
|
20
|
+
from mteb.models.models_protocols import EncoderProtocol
|
|
23
21
|
from mteb.types.statistics import (
|
|
24
|
-
ImageStatistics,
|
|
25
|
-
LabelStatistics,
|
|
26
22
|
SplitDescriptiveStatistics,
|
|
27
|
-
TextStatistics,
|
|
28
23
|
)
|
|
29
24
|
|
|
25
|
+
if TYPE_CHECKING:
|
|
26
|
+
from pathlib import Path
|
|
27
|
+
|
|
28
|
+
from mteb._evaluators.pair_classification_evaluator import (
|
|
29
|
+
PairClassificationDistances,
|
|
30
|
+
)
|
|
31
|
+
from mteb.models.models_protocols import MTEBModels
|
|
32
|
+
from mteb.types import EncodeKwargs, PromptType
|
|
33
|
+
from mteb.types.statistics import (
|
|
34
|
+
ImageStatistics,
|
|
35
|
+
LabelStatistics,
|
|
36
|
+
TextStatistics,
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
|
|
30
40
|
logger = logging.getLogger(__name__)
|
|
31
41
|
|
|
32
42
|
|
|
@@ -86,6 +96,7 @@ class AbsTaskPairClassification(AbsTask):
|
|
|
86
96
|
hf_subset: str,
|
|
87
97
|
encode_kwargs: EncodeKwargs,
|
|
88
98
|
prediction_folder: Path | None = None,
|
|
99
|
+
num_proc: int = 1,
|
|
89
100
|
**kwargs,
|
|
90
101
|
) -> dict[str, float]:
|
|
91
102
|
if not isinstance(model, EncoderProtocol):
|
|
@@ -105,7 +116,11 @@ class AbsTaskPairClassification(AbsTask):
|
|
|
105
116
|
input2_prompt_type=self.input2_prompt_type,
|
|
106
117
|
**kwargs,
|
|
107
118
|
)
|
|
108
|
-
similarity_scores = evaluator(
|
|
119
|
+
similarity_scores = evaluator(
|
|
120
|
+
model,
|
|
121
|
+
encode_kwargs=encode_kwargs,
|
|
122
|
+
num_proc=num_proc,
|
|
123
|
+
)
|
|
109
124
|
|
|
110
125
|
if prediction_folder:
|
|
111
126
|
self._save_task_predictions(
|
|
@@ -238,7 +253,7 @@ class AbsTaskPairClassification(AbsTask):
|
|
|
238
253
|
labels_statistics=calculate_label_statistics(labels),
|
|
239
254
|
)
|
|
240
255
|
|
|
241
|
-
def _push_dataset_to_hub(self, repo_name: str) -> None:
|
|
256
|
+
def _push_dataset_to_hub(self, repo_name: str, num_proc: int = 1) -> None:
|
|
242
257
|
# previously pair classification datasets were stored in a single row
|
|
243
258
|
if self.dataset is None:
|
|
244
259
|
# overall this shouldn't happen as we check for dataset before pushing to hub
|
|
@@ -262,6 +277,7 @@ class AbsTaskPairClassification(AbsTask):
|
|
|
262
277
|
self.input2_column_name,
|
|
263
278
|
self.label_column_name,
|
|
264
279
|
],
|
|
280
|
+
num_proc=num_proc,
|
|
265
281
|
)
|
|
266
282
|
|
|
267
283
|
def _compute_metrics_values(
|
mteb/abstasks/regression.py
CHANGED
|
@@ -1,29 +1,37 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import logging
|
|
2
|
-
from typing import TypedDict
|
|
4
|
+
from typing import TYPE_CHECKING, TypedDict
|
|
3
5
|
|
|
4
6
|
import datasets
|
|
5
7
|
import numpy as np
|
|
6
8
|
import pandas as pd
|
|
7
|
-
from datasets import Dataset
|
|
8
9
|
from scipy.stats import kendalltau
|
|
9
10
|
from sklearn.linear_model import LinearRegression
|
|
10
11
|
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
|
|
11
12
|
|
|
12
|
-
from mteb._evaluators.sklearn_evaluator import SklearnEvaluator
|
|
13
|
+
from mteb._evaluators.sklearn_evaluator import SklearnEvaluator
|
|
13
14
|
from mteb.abstasks._statistics_calculation import (
|
|
14
15
|
calculate_image_statistics,
|
|
15
16
|
calculate_score_statistics,
|
|
16
17
|
calculate_text_statistics,
|
|
17
18
|
)
|
|
18
19
|
from mteb.types.statistics import (
|
|
19
|
-
ImageStatistics,
|
|
20
|
-
ScoreStatistics,
|
|
21
20
|
SplitDescriptiveStatistics,
|
|
22
|
-
TextStatistics,
|
|
23
21
|
)
|
|
24
22
|
|
|
25
23
|
from .classification import AbsTaskClassification
|
|
26
24
|
|
|
25
|
+
if TYPE_CHECKING:
|
|
26
|
+
from datasets import Dataset
|
|
27
|
+
|
|
28
|
+
from mteb._evaluators.sklearn_evaluator import SklearnModelProtocol
|
|
29
|
+
from mteb.types.statistics import (
|
|
30
|
+
ImageStatistics,
|
|
31
|
+
ScoreStatistics,
|
|
32
|
+
TextStatistics,
|
|
33
|
+
)
|
|
34
|
+
|
|
27
35
|
logger = logging.getLogger(__name__)
|
|
28
36
|
|
|
29
37
|
|