mteb 2.7.2__py3-none-any.whl → 2.7.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/_create_dataloaders.py +63 -14
- mteb/_evaluators/any_sts_evaluator.py +12 -5
- mteb/_evaluators/clustering_evaluator.py +12 -4
- mteb/_evaluators/evaluator.py +11 -5
- mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +14 -5
- mteb/_evaluators/pair_classification_evaluator.py +13 -5
- mteb/_evaluators/retrieval_evaluator.py +22 -13
- mteb/_evaluators/retrieval_metrics.py +9 -3
- mteb/_evaluators/sklearn_evaluator.py +20 -11
- mteb/_evaluators/text/bitext_mining_evaluator.py +10 -3
- mteb/_evaluators/text/summarization_evaluator.py +10 -4
- mteb/_evaluators/zeroshot_classification_evaluator.py +12 -3
- mteb/_helpful_enum.py +5 -1
- mteb/abstasks/_data_filter/filters.py +8 -2
- mteb/abstasks/_data_filter/task_pipelines.py +7 -2
- mteb/abstasks/_statistics_calculation.py +6 -4
- mteb/abstasks/abstask.py +48 -21
- mteb/abstasks/aggregate_task_metadata.py +20 -9
- mteb/abstasks/aggregated_task.py +15 -8
- mteb/abstasks/classification.py +25 -9
- mteb/abstasks/clustering.py +23 -10
- mteb/abstasks/clustering_legacy.py +22 -8
- mteb/abstasks/image/image_text_pair_classification.py +23 -9
- mteb/abstasks/multilabel_classification.py +13 -5
- mteb/abstasks/pair_classification.py +27 -11
- mteb/abstasks/regression.py +14 -6
- mteb/abstasks/retrieval.py +56 -30
- mteb/abstasks/retrieval_dataset_loaders.py +48 -37
- mteb/abstasks/sts.py +29 -13
- mteb/abstasks/task_metadata.py +17 -8
- mteb/abstasks/text/bitext_mining.py +23 -12
- mteb/abstasks/text/reranking.py +2 -2
- mteb/abstasks/text/summarization.py +19 -8
- mteb/abstasks/zeroshot_classification.py +23 -9
- mteb/benchmarks/_create_table.py +13 -7
- mteb/benchmarks/benchmark.py +11 -1
- mteb/benchmarks/benchmarks/__init__.py +2 -0
- mteb/benchmarks/benchmarks/benchmarks.py +41 -2
- mteb/benchmarks/benchmarks/rteb_benchmarks.py +20 -9
- mteb/cache.py +10 -5
- mteb/cli/_display_tasks.py +9 -3
- mteb/cli/build_cli.py +5 -2
- mteb/cli/generate_model_card.py +9 -2
- mteb/deprecated_evaluator.py +16 -12
- mteb/descriptive_stats/Retrieval/BrightAopsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightBiologyLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightBiologyRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEarthScienceLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEarthScienceRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEconomicsLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEconomicsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightLeetcodeRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPonyLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPonyRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPsychologyLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPsychologyRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightRoboticsLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightRoboticsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightStackoverflowLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightStackoverflowRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightSustainableLivingLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightSustainableLivingRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightTheoremQAQuestionsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightTheoremQATheoremsRetrieval.json +35 -0
- mteb/evaluate.py +33 -20
- mteb/filter_tasks.py +12 -7
- mteb/get_tasks.py +9 -4
- mteb/languages/language_scripts.py +8 -3
- mteb/leaderboard/app.py +11 -4
- mteb/leaderboard/table.py +7 -2
- mteb/load_results.py +9 -3
- mteb/models/abs_encoder.py +22 -12
- mteb/models/cache_wrappers/cache_backend_protocol.py +5 -3
- mteb/models/cache_wrappers/cache_backends/_hash_utils.py +8 -4
- mteb/models/cache_wrappers/cache_backends/faiss_cache.py +8 -3
- mteb/models/cache_wrappers/cache_wrapper.py +14 -9
- mteb/models/get_model_meta.py +32 -6
- mteb/models/instruct_wrapper.py +13 -5
- mteb/models/model_implementations/align_models.py +10 -4
- mteb/models/model_implementations/amazon_models.py +1 -0
- mteb/models/model_implementations/andersborges.py +2 -0
- mteb/models/model_implementations/ara_models.py +1 -0
- mteb/models/model_implementations/arctic_models.py +8 -0
- mteb/models/model_implementations/b1ade_models.py +1 -0
- mteb/models/model_implementations/bedrock_models.py +20 -6
- mteb/models/model_implementations/bge_models.py +40 -1
- mteb/models/model_implementations/bica_model.py +1 -0
- mteb/models/model_implementations/blip2_models.py +11 -4
- mteb/models/model_implementations/blip_models.py +17 -4
- mteb/models/model_implementations/bm25.py +24 -14
- mteb/models/model_implementations/bmretriever_models.py +10 -2
- mteb/models/model_implementations/cadet_models.py +1 -0
- mteb/models/model_implementations/cde_models.py +11 -5
- mteb/models/model_implementations/clip_models.py +12 -4
- mteb/models/model_implementations/clips_models.py +3 -0
- mteb/models/model_implementations/codefuse_models.py +5 -0
- mteb/models/model_implementations/codesage_models.py +3 -0
- mteb/models/model_implementations/cohere_models.py +14 -4
- mteb/models/model_implementations/cohere_v.py +14 -4
- mteb/models/model_implementations/colpali_models.py +7 -3
- mteb/models/model_implementations/colqwen_models.py +17 -31
- mteb/models/model_implementations/colsmol_models.py +3 -1
- mteb/models/model_implementations/conan_models.py +11 -4
- mteb/models/model_implementations/dino_models.py +28 -4
- mteb/models/model_implementations/e5_instruct.py +4 -0
- mteb/models/model_implementations/e5_models.py +9 -0
- mteb/models/model_implementations/e5_v.py +10 -4
- mteb/models/model_implementations/eagerworks_models.py +11 -4
- mteb/models/model_implementations/emillykkejensen_models.py +3 -0
- mteb/models/model_implementations/en_code_retriever.py +1 -0
- mteb/models/model_implementations/euler_models.py +1 -0
- mteb/models/model_implementations/evaclip_models.py +13 -4
- mteb/models/model_implementations/fa_models.py +9 -0
- mteb/models/model_implementations/facebookai.py +2 -0
- mteb/models/model_implementations/geogpt_models.py +1 -0
- mteb/models/model_implementations/gme_v_models.py +7 -3
- mteb/models/model_implementations/google_models.py +15 -4
- mteb/models/model_implementations/granite_vision_embedding_models.py +7 -5
- mteb/models/model_implementations/gritlm_models.py +3 -0
- mteb/models/model_implementations/gte_models.py +9 -0
- mteb/models/model_implementations/hinvec_models.py +6 -1
- mteb/models/model_implementations/human.py +1 -0
- mteb/models/model_implementations/ibm_granite_models.py +6 -0
- mteb/models/model_implementations/inf_models.py +2 -0
- mteb/models/model_implementations/jasper_models.py +14 -5
- mteb/models/model_implementations/jina_clip.py +10 -4
- mteb/models/model_implementations/jina_models.py +17 -5
- mteb/models/model_implementations/kalm_models.py +24 -12
- mteb/models/model_implementations/kblab.py +1 -0
- mteb/models/model_implementations/kennethenevoldsen_models.py +2 -0
- mteb/models/model_implementations/kfst.py +1 -0
- mteb/models/model_implementations/kowshik24_models.py +1 -0
- mteb/models/model_implementations/lens_models.py +2 -0
- mteb/models/model_implementations/lgai_embedding_models.py +1 -0
- mteb/models/model_implementations/linq_models.py +7 -1
- mteb/models/model_implementations/listconranker.py +10 -4
- mteb/models/model_implementations/llm2clip_models.py +12 -4
- mteb/models/model_implementations/llm2vec_models.py +20 -6
- mteb/models/model_implementations/mcinext_models.py +8 -2
- mteb/models/model_implementations/mdbr_models.py +2 -0
- mteb/models/model_implementations/misc_models.py +63 -0
- mteb/models/model_implementations/mixedbread_ai_models.py +3 -0
- mteb/models/model_implementations/mme5_models.py +2 -1
- mteb/models/model_implementations/moco_models.py +11 -4
- mteb/models/model_implementations/mod_models.py +2 -1
- mteb/models/model_implementations/model2vec_models.py +23 -4
- mteb/models/model_implementations/moka_models.py +3 -0
- mteb/models/model_implementations/nbailab.py +3 -0
- mteb/models/model_implementations/no_instruct_sentence_models.py +13 -5
- mteb/models/model_implementations/nomic_models.py +17 -4
- mteb/models/model_implementations/nomic_models_vision.py +5 -3
- mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +9 -3
- mteb/models/model_implementations/nvidia_models.py +15 -4
- mteb/models/model_implementations/octen_models.py +3 -1
- mteb/models/model_implementations/openai_models.py +14 -4
- mteb/models/model_implementations/openclip_models.py +17 -4
- mteb/models/model_implementations/opensearch_neural_sparse_models.py +15 -4
- mteb/models/model_implementations/ops_moa_models.py +9 -2
- mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +1 -0
- mteb/models/model_implementations/pawan_models.py +1 -0
- mteb/models/model_implementations/piccolo_models.py +2 -0
- mteb/models/model_implementations/promptriever_models.py +16 -6
- mteb/models/model_implementations/pylate_models.py +32 -13
- mteb/models/model_implementations/qodo_models.py +2 -0
- mteb/models/model_implementations/qtack_models.py +1 -0
- mteb/models/model_implementations/qwen3_models.py +11 -1
- mteb/models/model_implementations/qzhou_models.py +2 -0
- mteb/models/model_implementations/random_baseline.py +4 -3
- mteb/models/model_implementations/rasgaard_models.py +1 -0
- mteb/models/model_implementations/reasonir_model.py +65 -0
- mteb/models/model_implementations/repllama_models.py +15 -6
- mteb/models/model_implementations/rerankers_custom.py +13 -4
- mteb/models/model_implementations/rerankers_monot5_based.py +24 -4
- mteb/models/model_implementations/richinfoai_models.py +1 -0
- mteb/models/model_implementations/ru_sentence_models.py +20 -0
- mteb/models/model_implementations/ruri_models.py +10 -0
- mteb/models/model_implementations/salesforce_models.py +10 -1
- mteb/models/model_implementations/samilpwc_models.py +1 -0
- mteb/models/model_implementations/sarashina_embedding_models.py +2 -0
- mteb/models/model_implementations/searchmap_models.py +1 -0
- mteb/models/model_implementations/seed_1_6_embedding_models.py +5 -2
- mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +6 -2
- mteb/models/model_implementations/seed_models.py +2 -1
- mteb/models/model_implementations/sentence_transformers_models.py +18 -0
- mteb/models/model_implementations/shuu_model.py +1 -0
- mteb/models/model_implementations/siglip_models.py +19 -4
- mteb/models/model_implementations/slm_models.py +7 -4
- mteb/models/model_implementations/sonar_models.py +2 -1
- mteb/models/model_implementations/spartan8806_atles_champion.py +1 -0
- mteb/models/model_implementations/stella_models.py +6 -0
- mteb/models/model_implementations/tarka_models.py +2 -0
- mteb/models/model_implementations/text2vec_models.py +3 -0
- mteb/models/model_implementations/ua_sentence_models.py +1 -0
- mteb/models/model_implementations/uae_models.py +10 -4
- mteb/models/model_implementations/vdr_models.py +8 -1
- mteb/models/model_implementations/vi_vn_models.py +6 -0
- mteb/models/model_implementations/vista_models.py +11 -4
- mteb/models/model_implementations/vlm2vec_models.py +11 -4
- mteb/models/model_implementations/voyage_models.py +52 -4
- mteb/models/model_implementations/voyage_v.py +11 -6
- mteb/models/model_implementations/xyz_models.py +1 -0
- mteb/models/model_implementations/youtu_models.py +1 -0
- mteb/models/model_implementations/yuan_models.py +1 -0
- mteb/models/model_implementations/yuan_models_en.py +2 -1
- mteb/models/model_meta.py +47 -9
- mteb/models/models_protocols.py +23 -18
- mteb/models/search_encoder_index/search_backend_protocol.py +7 -3
- mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +12 -4
- mteb/models/search_wrappers.py +31 -12
- mteb/models/sentence_transformer_wrapper.py +4 -3
- mteb/models/vllm_wrapper.py +8 -6
- mteb/results/benchmark_results.py +22 -17
- mteb/results/model_result.py +21 -15
- mteb/results/task_result.py +32 -16
- mteb/similarity_functions.py +8 -2
- mteb/tasks/aggregated_tasks/eng/cqadupstack_retrieval.py +3 -3
- mteb/tasks/aggregated_tasks/eng/sts17_multilingual_visual_sts_eng.py +3 -3
- mteb/tasks/aggregated_tasks/eng/sts_benchmark_multilingual_visual_sts_eng.py +3 -3
- mteb/tasks/aggregated_tasks/fas/cqadupstack_retrieval_fa.py +3 -3
- mteb/tasks/aggregated_tasks/fas/syn_per_chatbot_conv_sa_classification.py +3 -3
- mteb/tasks/aggregated_tasks/multilingual/sts17_multilingual_vision_sts.py +3 -3
- mteb/tasks/aggregated_tasks/multilingual/sts_benchmark_multilingual_visual_sts.py +3 -3
- mteb/tasks/aggregated_tasks/nld/cqadupstack_nl_retrieval.py +3 -3
- mteb/tasks/aggregated_tasks/pol/cqadupstack_retrieval_pl.py +3 -3
- mteb/tasks/bitext_mining/eng/pub_chem_smiles_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/fas/fa_mteb_summary_retrieval.py +3 -3
- mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/flores_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/in22_conv_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/in22_gen_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/norwegian_courts_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/ntrex_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/roma_tales_bitext_mining.py +2 -2
- mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -2
- mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -1
- mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -1
- mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -1
- mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -1
- mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -1
- mteb/tasks/classification/bul/bulgarian_store_review_sentiment_classfication.py +1 -1
- mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +2 -2
- mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -1
- mteb/tasks/classification/dan/dk_hate_classification.py +1 -1
- mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -1
- mteb/tasks/classification/ell/greek_legal_code_classification.py +1 -1
- mteb/tasks/classification/eng/dbpedia_classification.py +2 -2
- mteb/tasks/classification/eng/toxic_chat_classification.py +2 -2
- mteb/tasks/classification/eng/toxic_conversations_classification.py +2 -2
- mteb/tasks/classification/eng/tweet_topic_single_classification.py +1 -1
- mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -1
- mteb/tasks/classification/eng/yelp_review_full_classification.py +2 -2
- mteb/tasks/classification/est/estonian_valence.py +1 -1
- mteb/tasks/classification/fas/fa_mteb_classification.py +6 -6
- mteb/tasks/classification/fas/persian_food_sentiment_classification.py +1 -1
- mteb/tasks/classification/fil/filipino_shopee_reviews_classification.py +1 -1
- mteb/tasks/classification/fin/fin_toxicity_classification.py +1 -1
- mteb/tasks/classification/fra/french_book_reviews.py +2 -2
- mteb/tasks/classification/fra/movie_review_sentiment_classification.py +2 -2
- mteb/tasks/classification/guj/gujarati_news_classification.py +1 -1
- mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -1
- mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -1
- mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +2 -2
- mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -1
- mteb/tasks/classification/ita/dado_eval_coarse_classification.py +1 -1
- mteb/tasks/classification/ita/ita_casehold_classification.py +1 -1
- mteb/tasks/classification/ita/sardi_stance_classification.py +1 -1
- mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -1
- mteb/tasks/classification/jpn/wrime_classification.py +1 -1
- mteb/tasks/classification/kan/kannada_news_classification.py +2 -2
- mteb/tasks/classification/kor/klue_tc.py +2 -2
- mteb/tasks/classification/kor/kor_fin.py +1 -1
- mteb/tasks/classification/kor/kor_hate_classification.py +1 -1
- mteb/tasks/classification/kor/kor_sarcasm_classification.py +1 -1
- mteb/tasks/classification/mal/malayalam_news_classification.py +1 -1
- mteb/tasks/classification/mar/marathi_news_classification.py +1 -1
- mteb/tasks/classification/multilingual/afri_senti_lang_classification.py +1 -1
- mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -1
- mteb/tasks/classification/multilingual/cyrillic_turkic_lang_classification.py +1 -1
- mteb/tasks/classification/multilingual/indic_nlp_news_classification.py +1 -1
- mteb/tasks/classification/multilingual/masakha_news_classification.py +1 -1
- mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -1
- mteb/tasks/classification/multilingual/multilingual_sentiment_classification.py +1 -1
- mteb/tasks/classification/multilingual/scala_classification.py +1 -1
- mteb/tasks/classification/multilingual/sib200_classification.py +1 -1
- mteb/tasks/classification/multilingual/turkic_classification.py +1 -1
- mteb/tasks/classification/multilingual/tweet_sentiment_classification.py +1 -1
- mteb/tasks/classification/nep/nepali_news_classification.py +2 -2
- mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +1 -1
- mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +1 -1
- mteb/tasks/classification/ory/odia_news_classification.py +2 -2
- mteb/tasks/classification/pan/punjabi_news_classification.py +1 -1
- mteb/tasks/classification/ron/moroco.py +1 -1
- mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -1
- mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -1
- mteb/tasks/classification/rus/georeview_classification.py +1 -1
- mteb/tasks/classification/rus/headline_classification.py +2 -2
- mteb/tasks/classification/rus/inappropriateness_classification.py +2 -2
- mteb/tasks/classification/rus/ru_reviews_classification.py +2 -2
- mteb/tasks/classification/rus/ru_sci_bench_grnti_classification.py +1 -1
- mteb/tasks/classification/rus/ru_sci_bench_oecd_classification.py +1 -1
- mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -1
- mteb/tasks/classification/san/sanskrit_shlokas_classification.py +1 -1
- mteb/tasks/classification/sin/sinhala_news_classification.py +2 -2
- mteb/tasks/classification/sin/sinhala_news_source_classification.py +2 -2
- mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +2 -2
- mteb/tasks/classification/slv/frenk_sl_classification.py +1 -1
- mteb/tasks/classification/spa/spanish_news_classification.py +2 -2
- mteb/tasks/classification/ssw/siswati_news_classification.py +1 -1
- mteb/tasks/classification/tam/tamil_news_classification.py +2 -2
- mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +2 -2
- mteb/tasks/classification/tha/wongnai_reviews_classification.py +1 -1
- mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +2 -2
- mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -2
- mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -1
- mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -1
- mteb/tasks/classification/zho/yue_openrice_review_classification.py +2 -2
- mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -1
- mteb/tasks/clustering/deu/blurbs_clustering_p2p.py +1 -1
- mteb/tasks/clustering/deu/blurbs_clustering_s2s.py +1 -1
- mteb/tasks/clustering/eng/arxiv_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/arxiv_hierarchical_clustering.py +2 -2
- mteb/tasks/clustering/eng/big_patent_clustering.py +1 -1
- mteb/tasks/clustering/eng/biorxiv_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/biorxiv_clustering_s2s.py +1 -1
- mteb/tasks/clustering/eng/medrxiv_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/medrxiv_clustering_s2s.py +1 -1
- mteb/tasks/clustering/eng/reddit_clustering.py +1 -1
- mteb/tasks/clustering/eng/reddit_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/stack_exchange_clustering.py +1 -1
- mteb/tasks/clustering/eng/stack_exchange_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/twenty_newsgroups_clustering.py +1 -1
- mteb/tasks/clustering/fas/fa_mteb_clustering.py +4 -4
- mteb/tasks/clustering/fra/hal_clustering_s2s.py +2 -2
- mteb/tasks/clustering/multilingual/mlsum_clustering_p2p.py +2 -2
- mteb/tasks/clustering/multilingual/mlsum_clustering_s2s.py +2 -2
- mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -1
- mteb/tasks/clustering/multilingual/wiki_clustering_p2p.py +1 -1
- mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +1 -1
- mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +1 -1
- mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +1 -1
- mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +1 -1
- mteb/tasks/clustering/nld/vabb_clustering_p2p.py +1 -1
- mteb/tasks/clustering/nld/vabb_clustering_s2s.py +1 -1
- mteb/tasks/clustering/nob/snl_clustering.py +8 -3
- mteb/tasks/clustering/nob/vg_clustering.py +8 -3
- mteb/tasks/clustering/pol/polish_clustering.py +3 -3
- mteb/tasks/clustering/rus/ru_sci_bench_grnti_clustering_p2p.py +1 -1
- mteb/tasks/clustering/rus/ru_sci_bench_oecd_clustering_p2p.py +1 -1
- mteb/tasks/clustering/zho/cmteb_clustering.py +4 -4
- mteb/tasks/image_text_pair_classification/eng/image_co_de.py +1 -1
- mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +1 -1
- mteb/tasks/instruction_reranking/multilingual/m_follow_ir.py +2 -2
- mteb/tasks/multichoice/eng/cv_bench.py +4 -4
- mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -1
- mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -1
- mteb/tasks/multilabel_classification/rus/ru_toixic_multilabelclassification_okmlcup.py +1 -1
- mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
- mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -1
- mteb/tasks/pair_classification/ara/ar_entail.py +1 -1
- mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -1
- mteb/tasks/pair_classification/deu/false_friends_de_en_pc.py +1 -1
- mteb/tasks/pair_classification/eng/pub_chem_ai_sentence_paraphrase_pc.py +1 -1
- mteb/tasks/pair_classification/eng/pub_chem_smilespc.py +4 -3
- mteb/tasks/pair_classification/eng/pub_chem_synonym_pc.py +1 -1
- mteb/tasks/pair_classification/eng/pub_chem_wiki_paragraphs_pc.py +1 -1
- mteb/tasks/pair_classification/eng/sprint_duplicate_questions_pc.py +1 -1
- mteb/tasks/pair_classification/eng/twitter_sem_eval2015_pc.py +1 -1
- mteb/tasks/pair_classification/eng/twitter_url_corpus_pc.py +1 -1
- mteb/tasks/pair_classification/fas/fa_mteb_pair_classification.py +5 -5
- mteb/tasks/pair_classification/fas/fars_tail.py +2 -2
- mteb/tasks/pair_classification/hye/armenian_paraphrase_pc.py +1 -1
- mteb/tasks/pair_classification/ita/dis_co_tex_pair_classification.py +1 -1
- mteb/tasks/pair_classification/kor/klue_nli.py +1 -1
- mteb/tasks/pair_classification/multilingual/rte3.py +2 -2
- mteb/tasks/pair_classification/multilingual/xnli.py +1 -1
- mteb/tasks/pair_classification/pol/polish_pc.py +4 -4
- mteb/tasks/pair_classification/por/assin2_rte.py +1 -1
- mteb/tasks/pair_classification/por/sick_br_pc.py +1 -1
- mteb/tasks/pair_classification/rus/terra.py +2 -2
- mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -1
- mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -1
- mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -1
- mteb/tasks/pair_classification/zho/cmteb_pair_classification.py +2 -2
- mteb/tasks/retrieval/ara/sadeem_question_retrieval.py +1 -1
- mteb/tasks/retrieval/code/code_edit_search_retrieval.py +1 -1
- mteb/tasks/retrieval/code/code_rag.py +4 -4
- mteb/tasks/retrieval/code/code_search_net_cc_retrieval.py +1 -1
- mteb/tasks/retrieval/code/coir_code_search_net_retrieval.py +1 -1
- mteb/tasks/retrieval/code/ds1000_retrieval.py +1 -1
- mteb/tasks/retrieval/code/fresh_stack_retrieval.py +1 -1
- mteb/tasks/retrieval/code/human_eval_retrieval.py +1 -1
- mteb/tasks/retrieval/code/mbpp_retrieval.py +1 -1
- mteb/tasks/retrieval/code/wiki_sql_retrieval.py +1 -1
- mteb/tasks/retrieval/dan/dan_fever_retrieval.py +1 -1
- mteb/tasks/retrieval/dan/tv2_nordretrieval.py +1 -1
- mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +1 -1
- mteb/tasks/retrieval/deu/german_gov_service_retrieval.py +1 -1
- mteb/tasks/retrieval/deu/german_qu_ad_retrieval.py +1 -1
- mteb/tasks/retrieval/ell/greek_civics_qa.py +1 -1
- mteb/tasks/retrieval/eng/__init__.py +42 -0
- mteb/tasks/retrieval/eng/bright_retrieval.py +10 -2
- mteb/tasks/retrieval/eng/bright_v1_1_retrieval.py +968 -0
- mteb/tasks/retrieval/eng/chat_doctor_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/fin_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/finance_bench_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/hc3_finance_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_narrative_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_needle_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_passkey_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_summ_screen_fd_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_wikim_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lembqm_sum_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/limit_retrieval.py +6 -1
- mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/ml_questions.py +1 -1
- mteb/tasks/retrieval/eng/nano_argu_ana_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_climate_fever_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_db_pedia_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_fever_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_fi_qa2018_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_hotpot_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_msmarco_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_nf_corpus_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_nq_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_quora_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_sci_fact_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_scidocs_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_touche2020_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/narrative_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/r2_med_retrieval.py +8 -8
- mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +10 -10
- mteb/tasks/retrieval/fra/f_qu_ad_retrieval.py +1 -1
- mteb/tasks/retrieval/fra/syntec_retrieval.py +1 -1
- mteb/tasks/retrieval/hun/hun_sum2.py +1 -1
- mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt19.py +1 -1
- mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt21.py +1 -1
- mteb/tasks/retrieval/multilingual/cur_ev1_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/mr_tidy_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +5 -5
- mteb/tasks/retrieval/multilingual/statcan_dialogue_dataset_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +14 -4
- mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/x_qu_ad_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_android_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_english_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_gaming_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_gis_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_mathematica_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_physics_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_programmers_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_stats_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_tex_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_unix_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_webmasters_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_wordpress_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nob/norquad.py +1 -1
- mteb/tasks/retrieval/nob/snl_retrieval.py +1 -1
- mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -1
- mteb/tasks/retrieval/vie/vie_qu_ad_retrieval.py +1 -1
- mteb/tasks/sts/fao/faroese_sts.py +1 -1
- mteb/tasks/sts/fra/sick_fr_sts.py +1 -1
- mteb/tasks/sts/kor/klue_sts.py +1 -1
- mteb/tasks/sts/por/sick_br_sts.py +1 -1
- mteb/tasks/sts/rus/ru_para_phraser_sts.py +1 -1
- mteb/tasks/zeroshot_classification/eng/sci_mmir.py +1 -1
- mteb/types/_encoder_io.py +1 -1
- mteb/types/statistics.py +9 -2
- {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/METADATA +1 -1
- {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/RECORD +486 -465
- {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/WHEEL +1 -1
- {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/entry_points.txt +0 -0
- {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/top_level.txt +0 -0
mteb/_create_dataloaders.py
CHANGED
|
@@ -1,21 +1,28 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import logging
|
|
2
4
|
import warnings
|
|
3
|
-
from
|
|
4
|
-
from typing import Any, cast
|
|
5
|
+
from typing import TYPE_CHECKING, Any, cast
|
|
5
6
|
|
|
6
7
|
import torch
|
|
7
8
|
from datasets import Dataset, Image
|
|
8
9
|
from torch.utils.data import DataLoader, default_collate
|
|
9
10
|
|
|
10
|
-
from mteb.abstasks.task_metadata import TaskMetadata
|
|
11
11
|
from mteb.types import (
|
|
12
|
-
BatchedInput,
|
|
13
|
-
Conversation,
|
|
14
12
|
ConversationTurn,
|
|
15
13
|
PromptType,
|
|
16
|
-
QueryDatasetType,
|
|
17
14
|
)
|
|
18
|
-
|
|
15
|
+
|
|
16
|
+
if TYPE_CHECKING:
|
|
17
|
+
from collections.abc import Callable
|
|
18
|
+
|
|
19
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
20
|
+
from mteb.types import (
|
|
21
|
+
BatchedInput,
|
|
22
|
+
Conversation,
|
|
23
|
+
QueryDatasetType,
|
|
24
|
+
)
|
|
25
|
+
from mteb.types._encoder_io import CorpusInput, ImageInput, QueryInput, TextInput
|
|
19
26
|
|
|
20
27
|
logger = logging.getLogger(__name__)
|
|
21
28
|
|
|
@@ -23,6 +30,7 @@ logger = logging.getLogger(__name__)
|
|
|
23
30
|
def _create_dataloader_from_texts(
|
|
24
31
|
text: list[str],
|
|
25
32
|
batch_size: int = 32,
|
|
33
|
+
num_proc: int = 1,
|
|
26
34
|
**kwargs: Any,
|
|
27
35
|
) -> DataLoader[TextInput]:
|
|
28
36
|
"""Create a dataloader from a list of text.
|
|
@@ -30,15 +38,17 @@ def _create_dataloader_from_texts(
|
|
|
30
38
|
Args:
|
|
31
39
|
text: A list of text to create a dataloader from.
|
|
32
40
|
batch_size: Batch size for the dataloader.
|
|
41
|
+
num_proc: Number of processes to use.
|
|
33
42
|
kwargs: Not used, present catching extra arguments.
|
|
34
43
|
|
|
35
44
|
Returns:
|
|
36
45
|
A dataloader with the text.
|
|
37
46
|
"""
|
|
38
47
|
dataset = Dataset.from_dict({"text": text})
|
|
39
|
-
return
|
|
48
|
+
return DataLoader(
|
|
40
49
|
dataset,
|
|
41
50
|
batch_size=batch_size,
|
|
51
|
+
num_workers=num_proc if num_proc > 1 else 0,
|
|
42
52
|
)
|
|
43
53
|
|
|
44
54
|
|
|
@@ -64,20 +74,27 @@ def _corpus_to_dict(
|
|
|
64
74
|
def _create_dataloader_for_retrieval_corpus(
|
|
65
75
|
dataset: Dataset,
|
|
66
76
|
batch_size: int = 32,
|
|
77
|
+
num_proc: int = 1,
|
|
67
78
|
) -> DataLoader[CorpusInput]:
|
|
68
79
|
"""Create a dataloader from a corpus.
|
|
69
80
|
|
|
70
81
|
Args:
|
|
71
82
|
dataset: Corpus
|
|
72
83
|
batch_size: Batch size for the dataloader.
|
|
84
|
+
num_proc: Number of processes to use.
|
|
73
85
|
|
|
74
86
|
Returns:
|
|
75
87
|
A dataloader with the corpus.
|
|
76
88
|
"""
|
|
77
|
-
new_ds = dataset.map(
|
|
78
|
-
|
|
89
|
+
new_ds = dataset.map(
|
|
90
|
+
_corpus_to_dict,
|
|
91
|
+
desc="Converting corpus dict",
|
|
92
|
+
num_proc=num_proc,
|
|
93
|
+
)
|
|
94
|
+
return DataLoader(
|
|
79
95
|
new_ds,
|
|
80
96
|
batch_size=batch_size,
|
|
97
|
+
num_workers=num_proc if num_proc > 1 else 0,
|
|
81
98
|
)
|
|
82
99
|
|
|
83
100
|
|
|
@@ -94,12 +111,14 @@ def _combine_queries_with_instruction_text(row: dict[str, str]) -> dict[str, str
|
|
|
94
111
|
def _create_text_dataloader_for_queries(
|
|
95
112
|
queries: QueryDatasetType,
|
|
96
113
|
batch_size: int = 32,
|
|
114
|
+
num_proc: int = 1,
|
|
97
115
|
) -> DataLoader[QueryInput]:
|
|
98
116
|
"""Create a dataloader from a list of queries.
|
|
99
117
|
|
|
100
118
|
Args:
|
|
101
119
|
queries: A list of queries.
|
|
102
120
|
batch_size: Batch size for the dataloader.
|
|
121
|
+
num_proc: Number of processes to use.
|
|
103
122
|
|
|
104
123
|
Returns:
|
|
105
124
|
A dataloader with the queries.
|
|
@@ -107,10 +126,12 @@ def _create_text_dataloader_for_queries(
|
|
|
107
126
|
queries = queries.map(
|
|
108
127
|
_combine_queries_with_instruction_text,
|
|
109
128
|
desc="Processing queries for dataloading",
|
|
129
|
+
num_proc=num_proc,
|
|
110
130
|
)
|
|
111
|
-
return
|
|
131
|
+
return DataLoader(
|
|
112
132
|
queries,
|
|
113
133
|
batch_size=batch_size,
|
|
134
|
+
num_workers=num_proc if num_proc > 1 else 0,
|
|
114
135
|
)
|
|
115
136
|
|
|
116
137
|
|
|
@@ -128,7 +149,7 @@ def _convert_conv_history_to_query(
|
|
|
128
149
|
conversation = row["text"]
|
|
129
150
|
# if it's a list of strings, just join them
|
|
130
151
|
if isinstance(conversation, list) and isinstance(conversation[0], str):
|
|
131
|
-
conversation_ = cast(list[str], conversation)
|
|
152
|
+
conversation_ = cast("list[str]", conversation)
|
|
132
153
|
conv_str = "; ".join(conversation_)
|
|
133
154
|
current_conversation = [
|
|
134
155
|
ConversationTurn(role="user", content=message) for message in conversation_
|
|
@@ -173,18 +194,20 @@ def _convert_conv_history_to_query(
|
|
|
173
194
|
|
|
174
195
|
row["text"] = conv_str
|
|
175
196
|
row["conversation"] = current_conversation
|
|
176
|
-
return cast(dict[str, str | list[ConversationTurn]], row)
|
|
197
|
+
return cast("dict[str, str | list[ConversationTurn]]", row)
|
|
177
198
|
|
|
178
199
|
|
|
179
200
|
def _create_dataloader_for_queries_conversation(
|
|
180
201
|
queries: QueryDatasetType,
|
|
181
202
|
batch_size: int = 32,
|
|
203
|
+
num_proc: int = 1,
|
|
182
204
|
) -> DataLoader[QueryInput]:
|
|
183
205
|
"""Create a dataloader from a list of queries.
|
|
184
206
|
|
|
185
207
|
Args:
|
|
186
208
|
queries: A list of queries.
|
|
187
209
|
batch_size: Batch size for the dataloader.
|
|
210
|
+
num_proc: Number of processes to use.
|
|
188
211
|
|
|
189
212
|
Returns:
|
|
190
213
|
A dataloader with the queries.
|
|
@@ -193,9 +216,11 @@ def _create_dataloader_for_queries_conversation(
|
|
|
193
216
|
queries.map(
|
|
194
217
|
_convert_conv_history_to_query,
|
|
195
218
|
desc="Converting conversations to queries",
|
|
219
|
+
num_proc=num_proc,
|
|
196
220
|
),
|
|
197
221
|
collate_fn=_custom_collate_fn,
|
|
198
222
|
batch_size=batch_size,
|
|
223
|
+
num_workers=num_proc if num_proc > 1 else 0,
|
|
199
224
|
)
|
|
200
225
|
|
|
201
226
|
|
|
@@ -240,6 +265,7 @@ def _prepare_image_dataset(
|
|
|
240
265
|
dataset: Dataset,
|
|
241
266
|
image_column_name: str | None = None,
|
|
242
267
|
transform: Callable[[Any], Any] | None = None,
|
|
268
|
+
num_proc: int = 1,
|
|
243
269
|
) -> Dataset:
|
|
244
270
|
"""Prepare the image dataset by converting images to RGB and applying transformations."""
|
|
245
271
|
if (
|
|
@@ -255,6 +281,7 @@ def _prepare_image_dataset(
|
|
|
255
281
|
_convert_images_to_rgb,
|
|
256
282
|
fn_kwargs={"image_col_name": "image", "transform": transform},
|
|
257
283
|
desc="Converting images to RGB",
|
|
284
|
+
num_proc=num_proc,
|
|
258
285
|
)
|
|
259
286
|
|
|
260
287
|
|
|
@@ -288,6 +315,7 @@ def _create_image_dataloader(
|
|
|
288
315
|
batch_size: int = 32,
|
|
289
316
|
transform: Callable[[Any], Any] | None = None,
|
|
290
317
|
collate_fn: Callable[[list[dict[str, Any]]], dict[str, Any]] = _custom_collate_fn,
|
|
318
|
+
num_proc: int = 1,
|
|
291
319
|
) -> DataLoader[ImageInput]:
|
|
292
320
|
"""Creates a DataLoader with the image dataset prepared using the explicit transformation.
|
|
293
321
|
|
|
@@ -297,33 +325,41 @@ def _create_image_dataloader(
|
|
|
297
325
|
batch_size: Batch size for the dataloader.
|
|
298
326
|
transform: A transformation function to apply to each image (e.g., converting to tensor).
|
|
299
327
|
collate_fn: A custom collate function to handle batching.
|
|
328
|
+
num_proc: Number of processes to use.
|
|
300
329
|
|
|
301
330
|
Returns:
|
|
302
331
|
A DataLoader with the image dataset.
|
|
303
332
|
"""
|
|
304
333
|
dataset = _prepare_image_dataset(
|
|
305
|
-
dataset,
|
|
334
|
+
dataset,
|
|
335
|
+
image_column_name,
|
|
336
|
+
transform,
|
|
337
|
+
num_proc=num_proc,
|
|
306
338
|
).select_columns(["image"])
|
|
307
339
|
return DataLoader(
|
|
308
340
|
dataset,
|
|
309
341
|
batch_size=batch_size,
|
|
310
342
|
collate_fn=collate_fn,
|
|
311
343
|
shuffle=False,
|
|
344
|
+
num_workers=num_proc if num_proc > 1 else 0,
|
|
312
345
|
)
|
|
313
346
|
|
|
314
347
|
|
|
315
348
|
def _create_text_queries_dataloader(
|
|
316
349
|
dataset: Dataset,
|
|
317
350
|
batch_size: int = 32,
|
|
351
|
+
num_proc: int = 1,
|
|
318
352
|
) -> DataLoader[QueryInput]:
|
|
319
353
|
if not isinstance(dataset["text"][0], list):
|
|
320
354
|
return _create_text_dataloader_for_queries(
|
|
321
355
|
dataset,
|
|
322
356
|
batch_size=batch_size,
|
|
357
|
+
num_proc=num_proc,
|
|
323
358
|
)
|
|
324
359
|
return _create_dataloader_for_queries_conversation(
|
|
325
360
|
dataset,
|
|
326
361
|
batch_size=batch_size,
|
|
362
|
+
num_proc=num_proc,
|
|
327
363
|
)
|
|
328
364
|
|
|
329
365
|
|
|
@@ -332,6 +368,7 @@ def _create_queries_dataloader(
|
|
|
332
368
|
task_metadata: TaskMetadata,
|
|
333
369
|
input_column: str | None = None,
|
|
334
370
|
batch_size: int = 32,
|
|
371
|
+
num_proc: int = 1,
|
|
335
372
|
) -> DataLoader[QueryInput | ImageInput]:
|
|
336
373
|
"""Create a dataloader for queries."""
|
|
337
374
|
queries_type = task_metadata.get_modalities(PromptType.query)
|
|
@@ -339,12 +376,14 @@ def _create_queries_dataloader(
|
|
|
339
376
|
return _create_text_queries_dataloader(
|
|
340
377
|
dataset,
|
|
341
378
|
batch_size=batch_size,
|
|
379
|
+
num_proc=num_proc,
|
|
342
380
|
)
|
|
343
381
|
if "image" in queries_type: # contains image
|
|
344
382
|
return _create_image_dataloader(
|
|
345
383
|
dataset,
|
|
346
384
|
image_column_name="image",
|
|
347
385
|
batch_size=batch_size,
|
|
386
|
+
num_proc=num_proc,
|
|
348
387
|
)
|
|
349
388
|
raise ValueError(f"Can't handle queries type {queries_type}")
|
|
350
389
|
|
|
@@ -354,6 +393,7 @@ def _create_document_dataloader(
|
|
|
354
393
|
task_metadata: TaskMetadata,
|
|
355
394
|
input_column: str | None = None,
|
|
356
395
|
batch_size: int = 32,
|
|
396
|
+
num_proc: int = 1,
|
|
357
397
|
) -> DataLoader[CorpusInput | ImageInput]:
|
|
358
398
|
"""Create a dataloader for documents.
|
|
359
399
|
|
|
@@ -362,6 +402,7 @@ def _create_document_dataloader(
|
|
|
362
402
|
task_metadata: Metadata of the task to determine the document type.
|
|
363
403
|
input_column: The column to use as input. If None, it will use the first column that matches the modality.
|
|
364
404
|
batch_size: Batch size for the dataloader.
|
|
405
|
+
num_proc: Number of processes to use.
|
|
365
406
|
|
|
366
407
|
Returns:
|
|
367
408
|
A dataloader for the documents.
|
|
@@ -371,12 +412,14 @@ def _create_document_dataloader(
|
|
|
371
412
|
return _create_dataloader_for_retrieval_corpus(
|
|
372
413
|
dataset,
|
|
373
414
|
batch_size=batch_size,
|
|
415
|
+
num_proc=num_proc,
|
|
374
416
|
)
|
|
375
417
|
if "image" in document_type: # contains image
|
|
376
418
|
return _create_image_dataloader(
|
|
377
419
|
dataset,
|
|
378
420
|
image_column_name="image",
|
|
379
421
|
batch_size=batch_size,
|
|
422
|
+
num_proc=num_proc,
|
|
380
423
|
)
|
|
381
424
|
raise ValueError(f"Can't handle queries type {document_type}")
|
|
382
425
|
|
|
@@ -387,6 +430,7 @@ def create_dataloader(
|
|
|
387
430
|
prompt_type: PromptType | None = None,
|
|
388
431
|
input_column: str | None = None,
|
|
389
432
|
batch_size: int = 32,
|
|
433
|
+
num_proc: int = 1,
|
|
390
434
|
**kwargs: Any,
|
|
391
435
|
) -> DataLoader[BatchedInput]:
|
|
392
436
|
"""Create a dataloader from a dataset.
|
|
@@ -400,6 +444,7 @@ def create_dataloader(
|
|
|
400
444
|
prompt_type: The type of prompt to create a dataloader for. If None, it will be inferred from the task metadata.
|
|
401
445
|
input_column: The column to use as input. If None, it will use the first column that matches the modality.
|
|
402
446
|
batch_size: The batch size for the dataloader.
|
|
447
|
+
num_proc: The number of processes to use for dataset processing.
|
|
403
448
|
**kwargs: Additional arguments to pass to the dataloader creation functions.
|
|
404
449
|
|
|
405
450
|
Returns:
|
|
@@ -411,6 +456,7 @@ def create_dataloader(
|
|
|
411
456
|
task_metadata,
|
|
412
457
|
batch_size=batch_size,
|
|
413
458
|
input_column=input_column,
|
|
459
|
+
num_proc=num_proc,
|
|
414
460
|
)
|
|
415
461
|
if prompt_type == PromptType.document:
|
|
416
462
|
return _create_document_dataloader(
|
|
@@ -418,6 +464,7 @@ def create_dataloader(
|
|
|
418
464
|
task_metadata,
|
|
419
465
|
input_column=input_column,
|
|
420
466
|
batch_size=batch_size,
|
|
467
|
+
num_proc=num_proc,
|
|
421
468
|
)
|
|
422
469
|
|
|
423
470
|
if "image" in task_metadata.modalities:
|
|
@@ -425,6 +472,7 @@ def create_dataloader(
|
|
|
425
472
|
dataset,
|
|
426
473
|
image_column_name=input_column,
|
|
427
474
|
batch_size=batch_size,
|
|
475
|
+
num_proc=num_proc,
|
|
428
476
|
)
|
|
429
477
|
if "text" in task_metadata.modalities and input_column is not None:
|
|
430
478
|
return _create_dataloader_from_texts(
|
|
@@ -434,4 +482,5 @@ def create_dataloader(
|
|
|
434
482
|
return DataLoader(
|
|
435
483
|
dataset,
|
|
436
484
|
batch_size=batch_size,
|
|
485
|
+
num_workers=num_proc if num_proc > 1 else 0,
|
|
437
486
|
)
|
|
@@ -1,7 +1,8 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import logging
|
|
2
|
-
from typing import TypedDict
|
|
4
|
+
from typing import TYPE_CHECKING, TypedDict
|
|
3
5
|
|
|
4
|
-
from datasets import Dataset
|
|
5
6
|
from sklearn.metrics.pairwise import (
|
|
6
7
|
paired_cosine_distances,
|
|
7
8
|
paired_euclidean_distances,
|
|
@@ -9,13 +10,17 @@ from sklearn.metrics.pairwise import (
|
|
|
9
10
|
)
|
|
10
11
|
|
|
11
12
|
from mteb._create_dataloaders import create_dataloader
|
|
12
|
-
from mteb.abstasks.task_metadata import TaskMetadata
|
|
13
|
-
from mteb.models import EncoderProtocol
|
|
14
13
|
from mteb.similarity_functions import compute_pairwise_similarity
|
|
15
|
-
from mteb.types import EncodeKwargs, PromptType
|
|
16
14
|
|
|
17
15
|
from .evaluator import Evaluator
|
|
18
16
|
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
from datasets import Dataset
|
|
19
|
+
|
|
20
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
21
|
+
from mteb.models import EncoderProtocol
|
|
22
|
+
from mteb.types import EncodeKwargs, PromptType
|
|
23
|
+
|
|
19
24
|
logger = logging.getLogger(__name__)
|
|
20
25
|
|
|
21
26
|
|
|
@@ -61,6 +66,7 @@ class AnySTSEvaluator(Evaluator):
|
|
|
61
66
|
model: EncoderProtocol,
|
|
62
67
|
*,
|
|
63
68
|
encode_kwargs: EncodeKwargs,
|
|
69
|
+
num_proc: int = 1,
|
|
64
70
|
) -> STSEvaluatorScores:
|
|
65
71
|
logger.info("Running semantic similarity - Encoding samples (1/2)")
|
|
66
72
|
embeddings1 = model.encode(
|
|
@@ -68,6 +74,7 @@ class AnySTSEvaluator(Evaluator):
|
|
|
68
74
|
self.dataset,
|
|
69
75
|
self.task_metadata,
|
|
70
76
|
input_column=self.input_columns[0],
|
|
77
|
+
num_proc=num_proc,
|
|
71
78
|
**encode_kwargs,
|
|
72
79
|
),
|
|
73
80
|
task_metadata=self.task_metadata,
|
|
@@ -1,15 +1,21 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import logging
|
|
4
|
+
from typing import TYPE_CHECKING
|
|
2
5
|
|
|
3
|
-
from datasets import Dataset
|
|
4
6
|
from sklearn import cluster
|
|
5
7
|
|
|
6
8
|
from mteb._create_dataloaders import create_dataloader
|
|
7
|
-
from mteb.abstasks.task_metadata import TaskMetadata
|
|
8
|
-
from mteb.models import EncoderProtocol
|
|
9
|
-
from mteb.types import EncodeKwargs
|
|
10
9
|
|
|
11
10
|
from .evaluator import Evaluator
|
|
12
11
|
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from datasets import Dataset
|
|
14
|
+
|
|
15
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
16
|
+
from mteb.models import EncoderProtocol
|
|
17
|
+
from mteb.types import EncodeKwargs
|
|
18
|
+
|
|
13
19
|
logger = logging.getLogger(__name__)
|
|
14
20
|
|
|
15
21
|
|
|
@@ -39,11 +45,13 @@ class ClusteringEvaluator(Evaluator):
|
|
|
39
45
|
model: EncoderProtocol,
|
|
40
46
|
*,
|
|
41
47
|
encode_kwargs: EncodeKwargs,
|
|
48
|
+
num_proc: int = 1,
|
|
42
49
|
) -> list[int]:
|
|
43
50
|
data_loader = create_dataloader(
|
|
44
51
|
self.dataset,
|
|
45
52
|
self.task_metadata,
|
|
46
53
|
input_column=self.input_column_name,
|
|
54
|
+
num_proc=num_proc,
|
|
47
55
|
**encode_kwargs,
|
|
48
56
|
)
|
|
49
57
|
|
mteb/_evaluators/evaluator.py
CHANGED
|
@@ -1,10 +1,15 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
from abc import ABC, abstractmethod
|
|
2
|
-
from
|
|
3
|
-
from typing import Any
|
|
4
|
+
from typing import TYPE_CHECKING, Any
|
|
4
5
|
|
|
5
6
|
from mteb.abstasks.abstask import _set_seed
|
|
6
|
-
|
|
7
|
-
|
|
7
|
+
|
|
8
|
+
if TYPE_CHECKING:
|
|
9
|
+
from collections.abc import Iterable, Mapping
|
|
10
|
+
|
|
11
|
+
from mteb.models import EncoderProtocol
|
|
12
|
+
from mteb.types import EncodeKwargs
|
|
8
13
|
|
|
9
14
|
|
|
10
15
|
class Evaluator(ABC):
|
|
@@ -19,7 +24,7 @@ class Evaluator(ABC):
|
|
|
19
24
|
|
|
20
25
|
@abstractmethod
|
|
21
26
|
def __call__(
|
|
22
|
-
self, model: EncoderProtocol, *, encode_kwargs: EncodeKwargs
|
|
27
|
+
self, model: EncoderProtocol, *, encode_kwargs: EncodeKwargs, num_proc: int = 1
|
|
23
28
|
) -> Mapping[str, float] | Iterable[Any]:
|
|
24
29
|
"""This is called during training to evaluate the model.
|
|
25
30
|
|
|
@@ -28,5 +33,6 @@ class Evaluator(ABC):
|
|
|
28
33
|
Args:
|
|
29
34
|
model: the model to evaluate
|
|
30
35
|
encode_kwargs: kwargs to pass to the model's encode method
|
|
36
|
+
num_proc: number of processes to use for data loading
|
|
31
37
|
"""
|
|
32
38
|
pass
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
|
-
from collections.abc import Sequence
|
|
5
4
|
from typing import TYPE_CHECKING, Any
|
|
6
5
|
|
|
7
6
|
import torch
|
|
@@ -14,13 +13,16 @@ from mteb._create_dataloaders import (
|
|
|
14
13
|
)
|
|
15
14
|
from mteb._evaluators.evaluator import Evaluator
|
|
16
15
|
from mteb._requires_package import requires_image_dependencies
|
|
17
|
-
from mteb.abstasks.task_metadata import TaskMetadata
|
|
18
|
-
from mteb.models.models_protocols import EncoderProtocol
|
|
19
|
-
from mteb.types import EncodeKwargs
|
|
20
16
|
|
|
21
17
|
if TYPE_CHECKING:
|
|
18
|
+
from collections.abc import Sequence
|
|
19
|
+
|
|
22
20
|
from PIL.Image import Image
|
|
23
21
|
|
|
22
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
23
|
+
from mteb.models.models_protocols import EncoderProtocol
|
|
24
|
+
from mteb.types import EncodeKwargs
|
|
25
|
+
|
|
24
26
|
|
|
25
27
|
logger = logging.getLogger(__name__)
|
|
26
28
|
|
|
@@ -89,6 +91,7 @@ class ImageTextPairClassificationEvaluator(Evaluator):
|
|
|
89
91
|
model: EncoderProtocol,
|
|
90
92
|
*,
|
|
91
93
|
encode_kwargs: EncodeKwargs,
|
|
94
|
+
num_proc: int = 1,
|
|
92
95
|
) -> list[torch.Tensor]:
|
|
93
96
|
images = []
|
|
94
97
|
if isinstance(self.images_column_names, str):
|
|
@@ -111,6 +114,7 @@ class ImageTextPairClassificationEvaluator(Evaluator):
|
|
|
111
114
|
text_embeddings = model.encode(
|
|
112
115
|
_create_dataloader_from_texts(
|
|
113
116
|
texts,
|
|
117
|
+
num_proc=num_proc,
|
|
114
118
|
**encode_kwargs,
|
|
115
119
|
),
|
|
116
120
|
task_metadata=self.task_metadata,
|
|
@@ -127,10 +131,15 @@ class ImageTextPairClassificationEvaluator(Evaluator):
|
|
|
127
131
|
dim=-1,
|
|
128
132
|
).view(len(self.dataset), self.num_texts_per_sample, -1)
|
|
129
133
|
|
|
134
|
+
def _image_collate_fn(batch):
|
|
135
|
+
"""Collate function for image batches."""
|
|
136
|
+
return {"image": [item["image"] for item in batch]}
|
|
137
|
+
|
|
130
138
|
image_embeddings = model.encode(
|
|
131
139
|
DataLoader(
|
|
132
140
|
CustomImageDataset(images),
|
|
133
|
-
collate_fn=
|
|
141
|
+
collate_fn=_image_collate_fn,
|
|
142
|
+
num_workers=num_proc if num_proc > 1 else 0,
|
|
134
143
|
),
|
|
135
144
|
task_metadata=self.task_metadata,
|
|
136
145
|
hf_subset=self.hf_subset,
|
|
@@ -1,8 +1,9 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import logging
|
|
2
|
-
from typing import Any, TypedDict
|
|
4
|
+
from typing import TYPE_CHECKING, Any, TypedDict
|
|
3
5
|
|
|
4
6
|
import numpy as np
|
|
5
|
-
from datasets import Dataset
|
|
6
7
|
from sklearn.metrics.pairwise import (
|
|
7
8
|
paired_cosine_distances,
|
|
8
9
|
paired_euclidean_distances,
|
|
@@ -11,10 +12,14 @@ from sklearn.metrics.pairwise import (
|
|
|
11
12
|
|
|
12
13
|
from mteb._create_dataloaders import _create_dataloader_from_texts, create_dataloader
|
|
13
14
|
from mteb._evaluators.evaluator import Evaluator
|
|
14
|
-
from mteb.abstasks.task_metadata import TaskMetadata
|
|
15
|
-
from mteb.models import EncoderProtocol
|
|
16
15
|
from mteb.similarity_functions import compute_pairwise_similarity
|
|
17
|
-
|
|
16
|
+
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
from datasets import Dataset
|
|
19
|
+
|
|
20
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
21
|
+
from mteb.models import EncoderProtocol
|
|
22
|
+
from mteb.types import EncodeKwargs, PromptType
|
|
18
23
|
|
|
19
24
|
logger = logging.getLogger(__name__)
|
|
20
25
|
|
|
@@ -86,6 +91,7 @@ class PairClassificationEvaluator(Evaluator):
|
|
|
86
91
|
self,
|
|
87
92
|
model: EncoderProtocol,
|
|
88
93
|
encode_kwargs: EncodeKwargs,
|
|
94
|
+
num_proc: int = 1,
|
|
89
95
|
) -> PairClassificationDistances:
|
|
90
96
|
logger.info("Running pair classification - Encoding samples (1/2)")
|
|
91
97
|
embeddings1 = model.encode(
|
|
@@ -93,6 +99,7 @@ class PairClassificationEvaluator(Evaluator):
|
|
|
93
99
|
self.dataset,
|
|
94
100
|
task_metadata=self.task_metadata,
|
|
95
101
|
input_column=self.input1_column_name,
|
|
102
|
+
num_proc=num_proc,
|
|
96
103
|
**encode_kwargs,
|
|
97
104
|
),
|
|
98
105
|
task_metadata=self.task_metadata,
|
|
@@ -107,6 +114,7 @@ class PairClassificationEvaluator(Evaluator):
|
|
|
107
114
|
self.dataset,
|
|
108
115
|
task_metadata=self.task_metadata,
|
|
109
116
|
input_column=self.input2_column_name,
|
|
117
|
+
num_proc=num_proc,
|
|
110
118
|
**encode_kwargs,
|
|
111
119
|
),
|
|
112
120
|
task_metadata=self.task_metadata,
|
|
@@ -1,23 +1,29 @@
|
|
|
1
|
-
import
|
|
2
|
-
from collections.abc import Sequence
|
|
1
|
+
from __future__ import annotations
|
|
3
2
|
|
|
4
|
-
|
|
5
|
-
from
|
|
6
|
-
from mteb.types import (
|
|
7
|
-
CorpusDatasetType,
|
|
8
|
-
EncodeKwargs,
|
|
9
|
-
QueryDatasetType,
|
|
10
|
-
RelevantDocumentsType,
|
|
11
|
-
RetrievalEvaluationResult,
|
|
12
|
-
RetrievalOutputType,
|
|
13
|
-
TopRankedDocumentsType,
|
|
14
|
-
)
|
|
3
|
+
import logging
|
|
4
|
+
from typing import TYPE_CHECKING
|
|
15
5
|
|
|
16
6
|
from .evaluator import Evaluator
|
|
17
7
|
from .retrieval_metrics import (
|
|
18
8
|
calculate_retrieval_scores,
|
|
19
9
|
)
|
|
20
10
|
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
from collections.abc import Sequence
|
|
13
|
+
|
|
14
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
15
|
+
from mteb.models import SearchProtocol
|
|
16
|
+
from mteb.types import (
|
|
17
|
+
CorpusDatasetType,
|
|
18
|
+
EncodeKwargs,
|
|
19
|
+
QueryDatasetType,
|
|
20
|
+
RelevantDocumentsType,
|
|
21
|
+
RetrievalEvaluationResult,
|
|
22
|
+
RetrievalOutputType,
|
|
23
|
+
TopRankedDocumentsType,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
|
|
21
27
|
logger = logging.getLogger(__name__)
|
|
22
28
|
|
|
23
29
|
|
|
@@ -49,6 +55,7 @@ class RetrievalEvaluator(Evaluator):
|
|
|
49
55
|
self,
|
|
50
56
|
search_model: SearchProtocol,
|
|
51
57
|
encode_kwargs: EncodeKwargs,
|
|
58
|
+
num_proc: int = 1,
|
|
52
59
|
) -> RetrievalOutputType:
|
|
53
60
|
logger.info("Running retrieval task - Indexing corpus...")
|
|
54
61
|
search_model.index(
|
|
@@ -57,6 +64,7 @@ class RetrievalEvaluator(Evaluator):
|
|
|
57
64
|
hf_split=self.hf_split,
|
|
58
65
|
hf_subset=self.hf_subset,
|
|
59
66
|
encode_kwargs=encode_kwargs,
|
|
67
|
+
num_proc=num_proc,
|
|
60
68
|
)
|
|
61
69
|
logger.info("Running retrieval task - Searching queries...")
|
|
62
70
|
return search_model.search(
|
|
@@ -67,6 +75,7 @@ class RetrievalEvaluator(Evaluator):
|
|
|
67
75
|
hf_subset=self.hf_subset,
|
|
68
76
|
encode_kwargs=encode_kwargs,
|
|
69
77
|
top_ranked=self.top_ranked,
|
|
78
|
+
num_proc=num_proc,
|
|
70
79
|
)
|
|
71
80
|
|
|
72
81
|
def evaluate(
|
|
@@ -1,7 +1,8 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import logging
|
|
2
4
|
from collections import defaultdict
|
|
3
|
-
from
|
|
4
|
-
from typing import Any
|
|
5
|
+
from typing import TYPE_CHECKING, Any
|
|
5
6
|
|
|
6
7
|
import numpy as np
|
|
7
8
|
import pandas as pd
|
|
@@ -9,7 +10,12 @@ import pytrec_eval
|
|
|
9
10
|
from packaging.version import Version
|
|
10
11
|
from sklearn.metrics import auc
|
|
11
12
|
|
|
12
|
-
from mteb.types import
|
|
13
|
+
from mteb.types import RetrievalEvaluationResult
|
|
14
|
+
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
|
+
from collections.abc import Mapping
|
|
17
|
+
|
|
18
|
+
from mteb.types import RelevantDocumentsType
|
|
13
19
|
|
|
14
20
|
logger = logging.getLogger(__name__)
|
|
15
21
|
|