mteb 2.7.2__py3-none-any.whl → 2.7.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/_create_dataloaders.py +63 -14
- mteb/_evaluators/any_sts_evaluator.py +12 -5
- mteb/_evaluators/clustering_evaluator.py +12 -4
- mteb/_evaluators/evaluator.py +11 -5
- mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +14 -5
- mteb/_evaluators/pair_classification_evaluator.py +13 -5
- mteb/_evaluators/retrieval_evaluator.py +22 -13
- mteb/_evaluators/retrieval_metrics.py +9 -3
- mteb/_evaluators/sklearn_evaluator.py +20 -11
- mteb/_evaluators/text/bitext_mining_evaluator.py +10 -3
- mteb/_evaluators/text/summarization_evaluator.py +10 -4
- mteb/_evaluators/zeroshot_classification_evaluator.py +12 -3
- mteb/_helpful_enum.py +5 -1
- mteb/abstasks/_data_filter/filters.py +8 -2
- mteb/abstasks/_data_filter/task_pipelines.py +7 -2
- mteb/abstasks/_statistics_calculation.py +6 -4
- mteb/abstasks/abstask.py +48 -21
- mteb/abstasks/aggregate_task_metadata.py +20 -9
- mteb/abstasks/aggregated_task.py +15 -8
- mteb/abstasks/classification.py +25 -9
- mteb/abstasks/clustering.py +23 -10
- mteb/abstasks/clustering_legacy.py +22 -8
- mteb/abstasks/image/image_text_pair_classification.py +23 -9
- mteb/abstasks/multilabel_classification.py +13 -5
- mteb/abstasks/pair_classification.py +27 -11
- mteb/abstasks/regression.py +14 -6
- mteb/abstasks/retrieval.py +56 -30
- mteb/abstasks/retrieval_dataset_loaders.py +48 -37
- mteb/abstasks/sts.py +29 -13
- mteb/abstasks/task_metadata.py +17 -8
- mteb/abstasks/text/bitext_mining.py +23 -12
- mteb/abstasks/text/reranking.py +2 -2
- mteb/abstasks/text/summarization.py +19 -8
- mteb/abstasks/zeroshot_classification.py +23 -9
- mteb/benchmarks/_create_table.py +13 -7
- mteb/benchmarks/benchmark.py +11 -1
- mteb/benchmarks/benchmarks/__init__.py +2 -0
- mteb/benchmarks/benchmarks/benchmarks.py +41 -2
- mteb/benchmarks/benchmarks/rteb_benchmarks.py +20 -9
- mteb/cache.py +10 -5
- mteb/cli/_display_tasks.py +9 -3
- mteb/cli/build_cli.py +5 -2
- mteb/cli/generate_model_card.py +9 -2
- mteb/deprecated_evaluator.py +16 -12
- mteb/descriptive_stats/Retrieval/BrightAopsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightBiologyLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightBiologyRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEarthScienceLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEarthScienceRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEconomicsLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEconomicsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightLeetcodeRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPonyLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPonyRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPsychologyLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPsychologyRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightRoboticsLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightRoboticsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightStackoverflowLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightStackoverflowRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightSustainableLivingLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightSustainableLivingRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightTheoremQAQuestionsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightTheoremQATheoremsRetrieval.json +35 -0
- mteb/evaluate.py +33 -20
- mteb/filter_tasks.py +12 -7
- mteb/get_tasks.py +9 -4
- mteb/languages/language_scripts.py +8 -3
- mteb/leaderboard/app.py +11 -4
- mteb/leaderboard/table.py +7 -2
- mteb/load_results.py +9 -3
- mteb/models/abs_encoder.py +22 -12
- mteb/models/cache_wrappers/cache_backend_protocol.py +5 -3
- mteb/models/cache_wrappers/cache_backends/_hash_utils.py +8 -4
- mteb/models/cache_wrappers/cache_backends/faiss_cache.py +8 -3
- mteb/models/cache_wrappers/cache_wrapper.py +14 -9
- mteb/models/get_model_meta.py +32 -6
- mteb/models/instruct_wrapper.py +13 -5
- mteb/models/model_implementations/align_models.py +10 -4
- mteb/models/model_implementations/amazon_models.py +1 -0
- mteb/models/model_implementations/andersborges.py +2 -0
- mteb/models/model_implementations/ara_models.py +1 -0
- mteb/models/model_implementations/arctic_models.py +8 -0
- mteb/models/model_implementations/b1ade_models.py +1 -0
- mteb/models/model_implementations/bedrock_models.py +20 -6
- mteb/models/model_implementations/bge_models.py +40 -1
- mteb/models/model_implementations/bica_model.py +1 -0
- mteb/models/model_implementations/blip2_models.py +11 -4
- mteb/models/model_implementations/blip_models.py +17 -4
- mteb/models/model_implementations/bm25.py +24 -14
- mteb/models/model_implementations/bmretriever_models.py +10 -2
- mteb/models/model_implementations/cadet_models.py +1 -0
- mteb/models/model_implementations/cde_models.py +11 -5
- mteb/models/model_implementations/clip_models.py +12 -4
- mteb/models/model_implementations/clips_models.py +3 -0
- mteb/models/model_implementations/codefuse_models.py +5 -0
- mteb/models/model_implementations/codesage_models.py +3 -0
- mteb/models/model_implementations/cohere_models.py +14 -4
- mteb/models/model_implementations/cohere_v.py +14 -4
- mteb/models/model_implementations/colpali_models.py +7 -3
- mteb/models/model_implementations/colqwen_models.py +17 -31
- mteb/models/model_implementations/colsmol_models.py +3 -1
- mteb/models/model_implementations/conan_models.py +11 -4
- mteb/models/model_implementations/dino_models.py +28 -4
- mteb/models/model_implementations/e5_instruct.py +4 -0
- mteb/models/model_implementations/e5_models.py +9 -0
- mteb/models/model_implementations/e5_v.py +10 -4
- mteb/models/model_implementations/eagerworks_models.py +11 -4
- mteb/models/model_implementations/emillykkejensen_models.py +3 -0
- mteb/models/model_implementations/en_code_retriever.py +1 -0
- mteb/models/model_implementations/euler_models.py +1 -0
- mteb/models/model_implementations/evaclip_models.py +13 -4
- mteb/models/model_implementations/fa_models.py +9 -0
- mteb/models/model_implementations/facebookai.py +2 -0
- mteb/models/model_implementations/geogpt_models.py +1 -0
- mteb/models/model_implementations/gme_v_models.py +7 -3
- mteb/models/model_implementations/google_models.py +15 -4
- mteb/models/model_implementations/granite_vision_embedding_models.py +7 -5
- mteb/models/model_implementations/gritlm_models.py +3 -0
- mteb/models/model_implementations/gte_models.py +9 -0
- mteb/models/model_implementations/hinvec_models.py +6 -1
- mteb/models/model_implementations/human.py +1 -0
- mteb/models/model_implementations/ibm_granite_models.py +6 -0
- mteb/models/model_implementations/inf_models.py +2 -0
- mteb/models/model_implementations/jasper_models.py +14 -5
- mteb/models/model_implementations/jina_clip.py +10 -4
- mteb/models/model_implementations/jina_models.py +17 -5
- mteb/models/model_implementations/kalm_models.py +24 -12
- mteb/models/model_implementations/kblab.py +1 -0
- mteb/models/model_implementations/kennethenevoldsen_models.py +2 -0
- mteb/models/model_implementations/kfst.py +1 -0
- mteb/models/model_implementations/kowshik24_models.py +1 -0
- mteb/models/model_implementations/lens_models.py +2 -0
- mteb/models/model_implementations/lgai_embedding_models.py +1 -0
- mteb/models/model_implementations/linq_models.py +7 -1
- mteb/models/model_implementations/listconranker.py +10 -4
- mteb/models/model_implementations/llm2clip_models.py +12 -4
- mteb/models/model_implementations/llm2vec_models.py +20 -6
- mteb/models/model_implementations/mcinext_models.py +8 -2
- mteb/models/model_implementations/mdbr_models.py +2 -0
- mteb/models/model_implementations/misc_models.py +63 -0
- mteb/models/model_implementations/mixedbread_ai_models.py +3 -0
- mteb/models/model_implementations/mme5_models.py +2 -1
- mteb/models/model_implementations/moco_models.py +11 -4
- mteb/models/model_implementations/mod_models.py +2 -1
- mteb/models/model_implementations/model2vec_models.py +23 -4
- mteb/models/model_implementations/moka_models.py +3 -0
- mteb/models/model_implementations/nbailab.py +3 -0
- mteb/models/model_implementations/no_instruct_sentence_models.py +13 -5
- mteb/models/model_implementations/nomic_models.py +17 -4
- mteb/models/model_implementations/nomic_models_vision.py +5 -3
- mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +9 -3
- mteb/models/model_implementations/nvidia_models.py +15 -4
- mteb/models/model_implementations/octen_models.py +3 -1
- mteb/models/model_implementations/openai_models.py +14 -4
- mteb/models/model_implementations/openclip_models.py +17 -4
- mteb/models/model_implementations/opensearch_neural_sparse_models.py +15 -4
- mteb/models/model_implementations/ops_moa_models.py +9 -2
- mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +1 -0
- mteb/models/model_implementations/pawan_models.py +1 -0
- mteb/models/model_implementations/piccolo_models.py +2 -0
- mteb/models/model_implementations/promptriever_models.py +16 -6
- mteb/models/model_implementations/pylate_models.py +32 -13
- mteb/models/model_implementations/qodo_models.py +2 -0
- mteb/models/model_implementations/qtack_models.py +1 -0
- mteb/models/model_implementations/qwen3_models.py +11 -1
- mteb/models/model_implementations/qzhou_models.py +2 -0
- mteb/models/model_implementations/random_baseline.py +4 -3
- mteb/models/model_implementations/rasgaard_models.py +1 -0
- mteb/models/model_implementations/reasonir_model.py +65 -0
- mteb/models/model_implementations/repllama_models.py +15 -6
- mteb/models/model_implementations/rerankers_custom.py +13 -4
- mteb/models/model_implementations/rerankers_monot5_based.py +24 -4
- mteb/models/model_implementations/richinfoai_models.py +1 -0
- mteb/models/model_implementations/ru_sentence_models.py +20 -0
- mteb/models/model_implementations/ruri_models.py +10 -0
- mteb/models/model_implementations/salesforce_models.py +10 -1
- mteb/models/model_implementations/samilpwc_models.py +1 -0
- mteb/models/model_implementations/sarashina_embedding_models.py +2 -0
- mteb/models/model_implementations/searchmap_models.py +1 -0
- mteb/models/model_implementations/seed_1_6_embedding_models.py +5 -2
- mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +6 -2
- mteb/models/model_implementations/seed_models.py +2 -1
- mteb/models/model_implementations/sentence_transformers_models.py +18 -0
- mteb/models/model_implementations/shuu_model.py +1 -0
- mteb/models/model_implementations/siglip_models.py +19 -4
- mteb/models/model_implementations/slm_models.py +7 -4
- mteb/models/model_implementations/sonar_models.py +2 -1
- mteb/models/model_implementations/spartan8806_atles_champion.py +1 -0
- mteb/models/model_implementations/stella_models.py +6 -0
- mteb/models/model_implementations/tarka_models.py +2 -0
- mteb/models/model_implementations/text2vec_models.py +3 -0
- mteb/models/model_implementations/ua_sentence_models.py +1 -0
- mteb/models/model_implementations/uae_models.py +10 -4
- mteb/models/model_implementations/vdr_models.py +8 -1
- mteb/models/model_implementations/vi_vn_models.py +6 -0
- mteb/models/model_implementations/vista_models.py +11 -4
- mteb/models/model_implementations/vlm2vec_models.py +11 -4
- mteb/models/model_implementations/voyage_models.py +52 -4
- mteb/models/model_implementations/voyage_v.py +11 -6
- mteb/models/model_implementations/xyz_models.py +1 -0
- mteb/models/model_implementations/youtu_models.py +1 -0
- mteb/models/model_implementations/yuan_models.py +1 -0
- mteb/models/model_implementations/yuan_models_en.py +2 -1
- mteb/models/model_meta.py +47 -9
- mteb/models/models_protocols.py +23 -18
- mteb/models/search_encoder_index/search_backend_protocol.py +7 -3
- mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +12 -4
- mteb/models/search_wrappers.py +31 -12
- mteb/models/sentence_transformer_wrapper.py +4 -3
- mteb/models/vllm_wrapper.py +8 -6
- mteb/results/benchmark_results.py +22 -17
- mteb/results/model_result.py +21 -15
- mteb/results/task_result.py +32 -16
- mteb/similarity_functions.py +8 -2
- mteb/tasks/aggregated_tasks/eng/cqadupstack_retrieval.py +3 -3
- mteb/tasks/aggregated_tasks/eng/sts17_multilingual_visual_sts_eng.py +3 -3
- mteb/tasks/aggregated_tasks/eng/sts_benchmark_multilingual_visual_sts_eng.py +3 -3
- mteb/tasks/aggregated_tasks/fas/cqadupstack_retrieval_fa.py +3 -3
- mteb/tasks/aggregated_tasks/fas/syn_per_chatbot_conv_sa_classification.py +3 -3
- mteb/tasks/aggregated_tasks/multilingual/sts17_multilingual_vision_sts.py +3 -3
- mteb/tasks/aggregated_tasks/multilingual/sts_benchmark_multilingual_visual_sts.py +3 -3
- mteb/tasks/aggregated_tasks/nld/cqadupstack_nl_retrieval.py +3 -3
- mteb/tasks/aggregated_tasks/pol/cqadupstack_retrieval_pl.py +3 -3
- mteb/tasks/bitext_mining/eng/pub_chem_smiles_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/fas/fa_mteb_summary_retrieval.py +3 -3
- mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/flores_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/in22_conv_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/in22_gen_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/norwegian_courts_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/ntrex_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/roma_tales_bitext_mining.py +2 -2
- mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -2
- mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -1
- mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -1
- mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -1
- mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -1
- mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -1
- mteb/tasks/classification/bul/bulgarian_store_review_sentiment_classfication.py +1 -1
- mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +2 -2
- mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -1
- mteb/tasks/classification/dan/dk_hate_classification.py +1 -1
- mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -1
- mteb/tasks/classification/ell/greek_legal_code_classification.py +1 -1
- mteb/tasks/classification/eng/dbpedia_classification.py +2 -2
- mteb/tasks/classification/eng/toxic_chat_classification.py +2 -2
- mteb/tasks/classification/eng/toxic_conversations_classification.py +2 -2
- mteb/tasks/classification/eng/tweet_topic_single_classification.py +1 -1
- mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -1
- mteb/tasks/classification/eng/yelp_review_full_classification.py +2 -2
- mteb/tasks/classification/est/estonian_valence.py +1 -1
- mteb/tasks/classification/fas/fa_mteb_classification.py +6 -6
- mteb/tasks/classification/fas/persian_food_sentiment_classification.py +1 -1
- mteb/tasks/classification/fil/filipino_shopee_reviews_classification.py +1 -1
- mteb/tasks/classification/fin/fin_toxicity_classification.py +1 -1
- mteb/tasks/classification/fra/french_book_reviews.py +2 -2
- mteb/tasks/classification/fra/movie_review_sentiment_classification.py +2 -2
- mteb/tasks/classification/guj/gujarati_news_classification.py +1 -1
- mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -1
- mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -1
- mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +2 -2
- mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -1
- mteb/tasks/classification/ita/dado_eval_coarse_classification.py +1 -1
- mteb/tasks/classification/ita/ita_casehold_classification.py +1 -1
- mteb/tasks/classification/ita/sardi_stance_classification.py +1 -1
- mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -1
- mteb/tasks/classification/jpn/wrime_classification.py +1 -1
- mteb/tasks/classification/kan/kannada_news_classification.py +2 -2
- mteb/tasks/classification/kor/klue_tc.py +2 -2
- mteb/tasks/classification/kor/kor_fin.py +1 -1
- mteb/tasks/classification/kor/kor_hate_classification.py +1 -1
- mteb/tasks/classification/kor/kor_sarcasm_classification.py +1 -1
- mteb/tasks/classification/mal/malayalam_news_classification.py +1 -1
- mteb/tasks/classification/mar/marathi_news_classification.py +1 -1
- mteb/tasks/classification/multilingual/afri_senti_lang_classification.py +1 -1
- mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -1
- mteb/tasks/classification/multilingual/cyrillic_turkic_lang_classification.py +1 -1
- mteb/tasks/classification/multilingual/indic_nlp_news_classification.py +1 -1
- mteb/tasks/classification/multilingual/masakha_news_classification.py +1 -1
- mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -1
- mteb/tasks/classification/multilingual/multilingual_sentiment_classification.py +1 -1
- mteb/tasks/classification/multilingual/scala_classification.py +1 -1
- mteb/tasks/classification/multilingual/sib200_classification.py +1 -1
- mteb/tasks/classification/multilingual/turkic_classification.py +1 -1
- mteb/tasks/classification/multilingual/tweet_sentiment_classification.py +1 -1
- mteb/tasks/classification/nep/nepali_news_classification.py +2 -2
- mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +1 -1
- mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +1 -1
- mteb/tasks/classification/ory/odia_news_classification.py +2 -2
- mteb/tasks/classification/pan/punjabi_news_classification.py +1 -1
- mteb/tasks/classification/ron/moroco.py +1 -1
- mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -1
- mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -1
- mteb/tasks/classification/rus/georeview_classification.py +1 -1
- mteb/tasks/classification/rus/headline_classification.py +2 -2
- mteb/tasks/classification/rus/inappropriateness_classification.py +2 -2
- mteb/tasks/classification/rus/ru_reviews_classification.py +2 -2
- mteb/tasks/classification/rus/ru_sci_bench_grnti_classification.py +1 -1
- mteb/tasks/classification/rus/ru_sci_bench_oecd_classification.py +1 -1
- mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -1
- mteb/tasks/classification/san/sanskrit_shlokas_classification.py +1 -1
- mteb/tasks/classification/sin/sinhala_news_classification.py +2 -2
- mteb/tasks/classification/sin/sinhala_news_source_classification.py +2 -2
- mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +2 -2
- mteb/tasks/classification/slv/frenk_sl_classification.py +1 -1
- mteb/tasks/classification/spa/spanish_news_classification.py +2 -2
- mteb/tasks/classification/ssw/siswati_news_classification.py +1 -1
- mteb/tasks/classification/tam/tamil_news_classification.py +2 -2
- mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +2 -2
- mteb/tasks/classification/tha/wongnai_reviews_classification.py +1 -1
- mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +2 -2
- mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -2
- mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -1
- mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -1
- mteb/tasks/classification/zho/yue_openrice_review_classification.py +2 -2
- mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -1
- mteb/tasks/clustering/deu/blurbs_clustering_p2p.py +1 -1
- mteb/tasks/clustering/deu/blurbs_clustering_s2s.py +1 -1
- mteb/tasks/clustering/eng/arxiv_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/arxiv_hierarchical_clustering.py +2 -2
- mteb/tasks/clustering/eng/big_patent_clustering.py +1 -1
- mteb/tasks/clustering/eng/biorxiv_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/biorxiv_clustering_s2s.py +1 -1
- mteb/tasks/clustering/eng/medrxiv_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/medrxiv_clustering_s2s.py +1 -1
- mteb/tasks/clustering/eng/reddit_clustering.py +1 -1
- mteb/tasks/clustering/eng/reddit_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/stack_exchange_clustering.py +1 -1
- mteb/tasks/clustering/eng/stack_exchange_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/twenty_newsgroups_clustering.py +1 -1
- mteb/tasks/clustering/fas/fa_mteb_clustering.py +4 -4
- mteb/tasks/clustering/fra/hal_clustering_s2s.py +2 -2
- mteb/tasks/clustering/multilingual/mlsum_clustering_p2p.py +2 -2
- mteb/tasks/clustering/multilingual/mlsum_clustering_s2s.py +2 -2
- mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -1
- mteb/tasks/clustering/multilingual/wiki_clustering_p2p.py +1 -1
- mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +1 -1
- mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +1 -1
- mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +1 -1
- mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +1 -1
- mteb/tasks/clustering/nld/vabb_clustering_p2p.py +1 -1
- mteb/tasks/clustering/nld/vabb_clustering_s2s.py +1 -1
- mteb/tasks/clustering/nob/snl_clustering.py +8 -3
- mteb/tasks/clustering/nob/vg_clustering.py +8 -3
- mteb/tasks/clustering/pol/polish_clustering.py +3 -3
- mteb/tasks/clustering/rus/ru_sci_bench_grnti_clustering_p2p.py +1 -1
- mteb/tasks/clustering/rus/ru_sci_bench_oecd_clustering_p2p.py +1 -1
- mteb/tasks/clustering/zho/cmteb_clustering.py +4 -4
- mteb/tasks/image_text_pair_classification/eng/image_co_de.py +1 -1
- mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +1 -1
- mteb/tasks/instruction_reranking/multilingual/m_follow_ir.py +2 -2
- mteb/tasks/multichoice/eng/cv_bench.py +4 -4
- mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -1
- mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -1
- mteb/tasks/multilabel_classification/rus/ru_toixic_multilabelclassification_okmlcup.py +1 -1
- mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
- mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -1
- mteb/tasks/pair_classification/ara/ar_entail.py +1 -1
- mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -1
- mteb/tasks/pair_classification/deu/false_friends_de_en_pc.py +1 -1
- mteb/tasks/pair_classification/eng/pub_chem_ai_sentence_paraphrase_pc.py +1 -1
- mteb/tasks/pair_classification/eng/pub_chem_smilespc.py +4 -3
- mteb/tasks/pair_classification/eng/pub_chem_synonym_pc.py +1 -1
- mteb/tasks/pair_classification/eng/pub_chem_wiki_paragraphs_pc.py +1 -1
- mteb/tasks/pair_classification/eng/sprint_duplicate_questions_pc.py +1 -1
- mteb/tasks/pair_classification/eng/twitter_sem_eval2015_pc.py +1 -1
- mteb/tasks/pair_classification/eng/twitter_url_corpus_pc.py +1 -1
- mteb/tasks/pair_classification/fas/fa_mteb_pair_classification.py +5 -5
- mteb/tasks/pair_classification/fas/fars_tail.py +2 -2
- mteb/tasks/pair_classification/hye/armenian_paraphrase_pc.py +1 -1
- mteb/tasks/pair_classification/ita/dis_co_tex_pair_classification.py +1 -1
- mteb/tasks/pair_classification/kor/klue_nli.py +1 -1
- mteb/tasks/pair_classification/multilingual/rte3.py +2 -2
- mteb/tasks/pair_classification/multilingual/xnli.py +1 -1
- mteb/tasks/pair_classification/pol/polish_pc.py +4 -4
- mteb/tasks/pair_classification/por/assin2_rte.py +1 -1
- mteb/tasks/pair_classification/por/sick_br_pc.py +1 -1
- mteb/tasks/pair_classification/rus/terra.py +2 -2
- mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -1
- mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -1
- mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -1
- mteb/tasks/pair_classification/zho/cmteb_pair_classification.py +2 -2
- mteb/tasks/retrieval/ara/sadeem_question_retrieval.py +1 -1
- mteb/tasks/retrieval/code/code_edit_search_retrieval.py +1 -1
- mteb/tasks/retrieval/code/code_rag.py +4 -4
- mteb/tasks/retrieval/code/code_search_net_cc_retrieval.py +1 -1
- mteb/tasks/retrieval/code/coir_code_search_net_retrieval.py +1 -1
- mteb/tasks/retrieval/code/ds1000_retrieval.py +1 -1
- mteb/tasks/retrieval/code/fresh_stack_retrieval.py +1 -1
- mteb/tasks/retrieval/code/human_eval_retrieval.py +1 -1
- mteb/tasks/retrieval/code/mbpp_retrieval.py +1 -1
- mteb/tasks/retrieval/code/wiki_sql_retrieval.py +1 -1
- mteb/tasks/retrieval/dan/dan_fever_retrieval.py +1 -1
- mteb/tasks/retrieval/dan/tv2_nordretrieval.py +1 -1
- mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +1 -1
- mteb/tasks/retrieval/deu/german_gov_service_retrieval.py +1 -1
- mteb/tasks/retrieval/deu/german_qu_ad_retrieval.py +1 -1
- mteb/tasks/retrieval/ell/greek_civics_qa.py +1 -1
- mteb/tasks/retrieval/eng/__init__.py +42 -0
- mteb/tasks/retrieval/eng/bright_retrieval.py +10 -2
- mteb/tasks/retrieval/eng/bright_v1_1_retrieval.py +968 -0
- mteb/tasks/retrieval/eng/chat_doctor_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/fin_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/finance_bench_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/hc3_finance_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_narrative_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_needle_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_passkey_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_summ_screen_fd_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_wikim_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lembqm_sum_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/limit_retrieval.py +6 -1
- mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/ml_questions.py +1 -1
- mteb/tasks/retrieval/eng/nano_argu_ana_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_climate_fever_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_db_pedia_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_fever_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_fi_qa2018_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_hotpot_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_msmarco_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_nf_corpus_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_nq_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_quora_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_sci_fact_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_scidocs_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_touche2020_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/narrative_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/r2_med_retrieval.py +8 -8
- mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +10 -10
- mteb/tasks/retrieval/fra/f_qu_ad_retrieval.py +1 -1
- mteb/tasks/retrieval/fra/syntec_retrieval.py +1 -1
- mteb/tasks/retrieval/hun/hun_sum2.py +1 -1
- mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt19.py +1 -1
- mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt21.py +1 -1
- mteb/tasks/retrieval/multilingual/cur_ev1_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/mr_tidy_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +5 -5
- mteb/tasks/retrieval/multilingual/statcan_dialogue_dataset_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +14 -4
- mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/x_qu_ad_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_android_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_english_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_gaming_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_gis_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_mathematica_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_physics_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_programmers_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_stats_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_tex_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_unix_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_webmasters_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_wordpress_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nob/norquad.py +1 -1
- mteb/tasks/retrieval/nob/snl_retrieval.py +1 -1
- mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -1
- mteb/tasks/retrieval/vie/vie_qu_ad_retrieval.py +1 -1
- mteb/tasks/sts/fao/faroese_sts.py +1 -1
- mteb/tasks/sts/fra/sick_fr_sts.py +1 -1
- mteb/tasks/sts/kor/klue_sts.py +1 -1
- mteb/tasks/sts/por/sick_br_sts.py +1 -1
- mteb/tasks/sts/rus/ru_para_phraser_sts.py +1 -1
- mteb/tasks/zeroshot_classification/eng/sci_mmir.py +1 -1
- mteb/types/_encoder_io.py +1 -1
- mteb/types/statistics.py +9 -2
- {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/METADATA +1 -1
- {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/RECORD +486 -465
- {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/WHEEL +1 -1
- {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/entry_points.txt +0 -0
- {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.7.2.dist-info → mteb-2.7.9.dist-info}/top_level.txt +0 -0
|
@@ -1,18 +1,22 @@
|
|
|
1
|
-
import
|
|
2
|
-
from typing import Any, Protocol, cast
|
|
1
|
+
from __future__ import annotations
|
|
3
2
|
|
|
4
|
-
import
|
|
5
|
-
from
|
|
6
|
-
from torch.utils.data import DataLoader
|
|
7
|
-
from typing_extensions import Self
|
|
3
|
+
import logging
|
|
4
|
+
from typing import TYPE_CHECKING, Any, Protocol, cast
|
|
8
5
|
|
|
9
6
|
from mteb._create_dataloaders import create_dataloader
|
|
10
|
-
from mteb.abstasks.task_metadata import TaskMetadata
|
|
11
|
-
from mteb.models import EncoderProtocol
|
|
12
|
-
from mteb.types import Array, BatchedInput, EncodeKwargs
|
|
13
7
|
|
|
14
8
|
from .evaluator import Evaluator
|
|
15
9
|
|
|
10
|
+
if TYPE_CHECKING:
|
|
11
|
+
import numpy as np
|
|
12
|
+
from datasets import Dataset
|
|
13
|
+
from torch.utils.data import DataLoader
|
|
14
|
+
from typing_extensions import Self
|
|
15
|
+
|
|
16
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
17
|
+
from mteb.models import EncoderProtocol
|
|
18
|
+
from mteb.types import Array, BatchedInput, EncodeKwargs
|
|
19
|
+
|
|
16
20
|
logger = logging.getLogger(__name__)
|
|
17
21
|
|
|
18
22
|
|
|
@@ -50,18 +54,20 @@ class SklearnEvaluator(Evaluator):
|
|
|
50
54
|
self.evaluator_model = evaluator_model
|
|
51
55
|
|
|
52
56
|
def create_dataloaders(
|
|
53
|
-
self, encode_kwargs: EncodeKwargs
|
|
57
|
+
self, encode_kwargs: EncodeKwargs, num_proc: int
|
|
54
58
|
) -> tuple[DataLoader[BatchedInput], DataLoader[BatchedInput]]:
|
|
55
59
|
dataloader_train = create_dataloader(
|
|
56
60
|
self.train_dataset,
|
|
57
61
|
self.task_metadata,
|
|
58
62
|
input_column=self.values_column_name,
|
|
63
|
+
num_proc=num_proc,
|
|
59
64
|
**encode_kwargs,
|
|
60
65
|
)
|
|
61
66
|
dataloader_test = create_dataloader(
|
|
62
67
|
self.eval_dataset,
|
|
63
68
|
self.task_metadata,
|
|
64
69
|
input_column=self.values_column_name,
|
|
70
|
+
num_proc=num_proc,
|
|
65
71
|
**encode_kwargs,
|
|
66
72
|
)
|
|
67
73
|
return dataloader_train, dataloader_test
|
|
@@ -72,6 +78,7 @@ class SklearnEvaluator(Evaluator):
|
|
|
72
78
|
*,
|
|
73
79
|
encode_kwargs: EncodeKwargs,
|
|
74
80
|
test_cache: Array | None = None,
|
|
81
|
+
num_proc: int = 1,
|
|
75
82
|
) -> tuple[np.ndarray, Array]:
|
|
76
83
|
"""Classification evaluation by training a sklearn classifier on the embeddings of the training set and evaluating on the embeddings of the test set.
|
|
77
84
|
|
|
@@ -79,6 +86,7 @@ class SklearnEvaluator(Evaluator):
|
|
|
79
86
|
model: Encoder
|
|
80
87
|
encode_kwargs: encode kwargs
|
|
81
88
|
test_cache: embeddings of the test set, if already computed
|
|
89
|
+
num_proc: number of processes to use
|
|
82
90
|
|
|
83
91
|
Returns:
|
|
84
92
|
Tuple of test predictions and embeddings
|
|
@@ -86,6 +94,7 @@ class SklearnEvaluator(Evaluator):
|
|
|
86
94
|
"""
|
|
87
95
|
dataloader_train, dataloader_test = self.create_dataloaders(
|
|
88
96
|
encode_kwargs=encode_kwargs,
|
|
97
|
+
num_proc=num_proc,
|
|
89
98
|
)
|
|
90
99
|
|
|
91
100
|
logger.info("Running - Encoding samples...")
|
|
@@ -104,7 +113,7 @@ class SklearnEvaluator(Evaluator):
|
|
|
104
113
|
hf_subset=self.hf_subset,
|
|
105
114
|
**encode_kwargs,
|
|
106
115
|
)
|
|
107
|
-
test_cache = cast(Array, test_cache)
|
|
116
|
+
test_cache = cast("Array", test_cache)
|
|
108
117
|
|
|
109
118
|
logger.info("Running - Fitting classifier...")
|
|
110
119
|
y_train = self.train_dataset[self.label_column_name]
|
|
@@ -1,4 +1,7 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import logging
|
|
4
|
+
from typing import TYPE_CHECKING
|
|
2
5
|
|
|
3
6
|
import torch
|
|
4
7
|
from datasets import Dataset
|
|
@@ -6,9 +9,11 @@ from tqdm.auto import tqdm
|
|
|
6
9
|
|
|
7
10
|
from mteb._create_dataloaders import _create_dataloader_from_texts
|
|
8
11
|
from mteb._evaluators.evaluator import Evaluator
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
from mteb.
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
15
|
+
from mteb.models import EncoderProtocol
|
|
16
|
+
from mteb.types import Array, EncodeKwargs
|
|
12
17
|
|
|
13
18
|
logger = logging.getLogger(__name__)
|
|
14
19
|
|
|
@@ -36,6 +41,7 @@ class BitextMiningEvaluator(Evaluator):
|
|
|
36
41
|
model: EncoderProtocol,
|
|
37
42
|
*,
|
|
38
43
|
encode_kwargs: EncodeKwargs,
|
|
44
|
+
num_proc: int = 1,
|
|
39
45
|
) -> dict[str, list[dict[str, float]]]:
|
|
40
46
|
pair_elements = {p for pair in self.pairs for p in pair}
|
|
41
47
|
if isinstance(self.sentences, Dataset):
|
|
@@ -50,6 +56,7 @@ class BitextMiningEvaluator(Evaluator):
|
|
|
50
56
|
for sub in tqdm(subsets):
|
|
51
57
|
dataloader = _create_dataloader_from_texts(
|
|
52
58
|
self.sentences[sub],
|
|
59
|
+
num_proc=num_proc,
|
|
53
60
|
**encode_kwargs,
|
|
54
61
|
)
|
|
55
62
|
embeddings[sub] = model.encode(
|
|
@@ -1,6 +1,8 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import logging
|
|
2
4
|
import sys
|
|
3
|
-
from typing import TypedDict
|
|
5
|
+
from typing import TYPE_CHECKING, TypedDict
|
|
4
6
|
|
|
5
7
|
import numpy as np
|
|
6
8
|
import torch
|
|
@@ -9,10 +11,12 @@ from tqdm.auto import tqdm
|
|
|
9
11
|
|
|
10
12
|
from mteb._create_dataloaders import _create_dataloader_from_texts
|
|
11
13
|
from mteb._evaluators.evaluator import Evaluator
|
|
12
|
-
from mteb.abstasks.task_metadata import TaskMetadata
|
|
13
|
-
from mteb.models import EncoderProtocol
|
|
14
14
|
from mteb.similarity_functions import cos_sim, dot_score
|
|
15
|
-
|
|
15
|
+
|
|
16
|
+
if TYPE_CHECKING:
|
|
17
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
18
|
+
from mteb.models import EncoderProtocol
|
|
19
|
+
from mteb.types import EncodeKwargs
|
|
16
20
|
|
|
17
21
|
# if later than python 3.13 use typing module
|
|
18
22
|
if sys.version_info >= (3, 13):
|
|
@@ -96,6 +100,7 @@ class SummarizationEvaluator(Evaluator):
|
|
|
96
100
|
model: EncoderProtocol,
|
|
97
101
|
*,
|
|
98
102
|
encode_kwargs: EncodeKwargs,
|
|
103
|
+
num_proc: int = 1,
|
|
99
104
|
) -> SummarizationDistances:
|
|
100
105
|
# Get the human & machine summaries for the text in one go for all
|
|
101
106
|
human_lens = [len(human_summaries) for human_summaries in self.human_summaries]
|
|
@@ -111,6 +116,7 @@ class SummarizationEvaluator(Evaluator):
|
|
|
111
116
|
for human_summaries in self.human_summaries
|
|
112
117
|
for summary in human_summaries
|
|
113
118
|
],
|
|
119
|
+
num_proc=num_proc,
|
|
114
120
|
**encode_kwargs,
|
|
115
121
|
),
|
|
116
122
|
task_metadata=self.task_metadata,
|
|
@@ -1,4 +1,7 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import logging
|
|
4
|
+
from typing import TYPE_CHECKING
|
|
2
5
|
|
|
3
6
|
from datasets import Dataset
|
|
4
7
|
|
|
@@ -6,13 +9,17 @@ from mteb._create_dataloaders import (
|
|
|
6
9
|
_create_dataloader_from_texts,
|
|
7
10
|
create_dataloader,
|
|
8
11
|
)
|
|
9
|
-
from mteb.abstasks.task_metadata import TaskMetadata
|
|
10
|
-
from mteb.models import EncoderProtocol
|
|
11
12
|
from mteb.similarity_functions import similarity
|
|
12
|
-
from mteb.types import Array, EncodeKwargs
|
|
13
13
|
|
|
14
14
|
from .evaluator import Evaluator
|
|
15
15
|
|
|
16
|
+
if TYPE_CHECKING:
|
|
17
|
+
from datasets import Dataset
|
|
18
|
+
|
|
19
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
20
|
+
from mteb.models import EncoderProtocol
|
|
21
|
+
from mteb.types import Array, EncodeKwargs
|
|
22
|
+
|
|
16
23
|
logger = logging.getLogger(__name__)
|
|
17
24
|
|
|
18
25
|
|
|
@@ -41,11 +48,13 @@ class ZeroShotClassificationEvaluator(Evaluator):
|
|
|
41
48
|
model: EncoderProtocol,
|
|
42
49
|
*,
|
|
43
50
|
encode_kwargs: EncodeKwargs,
|
|
51
|
+
num_proc: int = 1,
|
|
44
52
|
) -> Array:
|
|
45
53
|
dataloader = create_dataloader(
|
|
46
54
|
self.dataset,
|
|
47
55
|
input_column=self.input_column_name,
|
|
48
56
|
task_metadata=self.task_metadata,
|
|
57
|
+
num_proc=num_proc,
|
|
49
58
|
**encode_kwargs,
|
|
50
59
|
)
|
|
51
60
|
|
mteb/_helpful_enum.py
CHANGED
|
@@ -1,12 +1,18 @@
|
|
|
1
1
|
"""Simplified version of https://gist.github.com/AlexeyVatolin/ea3adc21aa7a767603ff393b22085adc from https://github.com/embeddings-benchmark/mteb/pull/2900"""
|
|
2
2
|
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
3
5
|
import logging
|
|
6
|
+
from typing import TYPE_CHECKING
|
|
4
7
|
|
|
5
8
|
import datasets
|
|
6
9
|
import pandas as pd
|
|
7
|
-
from datasets import
|
|
10
|
+
from datasets import DatasetDict
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from datasets import Dataset
|
|
8
14
|
|
|
9
|
-
from mteb import TaskMetadata
|
|
15
|
+
from mteb import TaskMetadata
|
|
10
16
|
|
|
11
17
|
logger = logging.getLogger(__name__)
|
|
12
18
|
|
|
@@ -1,9 +1,10 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import logging
|
|
4
|
+
from typing import TYPE_CHECKING
|
|
2
5
|
|
|
3
6
|
from datasets import DatasetDict
|
|
4
7
|
|
|
5
|
-
from mteb import TaskMetadata
|
|
6
|
-
from mteb.abstasks import AbsTaskClassification
|
|
7
8
|
from mteb.abstasks._data_filter.filters import (
|
|
8
9
|
deduplicate,
|
|
9
10
|
filter_empty,
|
|
@@ -13,6 +14,10 @@ from mteb.abstasks._data_filter.filters import (
|
|
|
13
14
|
split_train_test,
|
|
14
15
|
)
|
|
15
16
|
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
from mteb import TaskMetadata
|
|
19
|
+
from mteb.abstasks import AbsTaskClassification
|
|
20
|
+
|
|
16
21
|
logger = logging.getLogger(__name__)
|
|
17
22
|
|
|
18
23
|
|
|
@@ -2,10 +2,8 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import hashlib
|
|
4
4
|
from collections import Counter
|
|
5
|
-
from collections.abc import Mapping
|
|
6
5
|
from typing import TYPE_CHECKING, cast
|
|
7
6
|
|
|
8
|
-
from mteb.types import TopRankedDocumentsType
|
|
9
7
|
from mteb.types.statistics import (
|
|
10
8
|
ImageStatistics,
|
|
11
9
|
LabelStatistics,
|
|
@@ -16,8 +14,12 @@ from mteb.types.statistics import (
|
|
|
16
14
|
)
|
|
17
15
|
|
|
18
16
|
if TYPE_CHECKING:
|
|
17
|
+
from collections.abc import Mapping
|
|
18
|
+
|
|
19
19
|
from PIL import Image
|
|
20
20
|
|
|
21
|
+
from mteb.types import TopRankedDocumentsType
|
|
22
|
+
|
|
21
23
|
|
|
22
24
|
def calculate_text_statistics(texts: list[str]) -> TextStatistics:
|
|
23
25
|
"""Calculate descriptive statistics for a list of texts.
|
|
@@ -87,13 +89,13 @@ def calculate_label_statistics(labels: list[int | list[int]]) -> LabelStatistics
|
|
|
87
89
|
|
|
88
90
|
if not isinstance(labels[0], list):
|
|
89
91
|
# single label classification
|
|
90
|
-
single_label = cast(list[int], labels)
|
|
92
|
+
single_label = cast("list[int]", labels)
|
|
91
93
|
label_len = [1] * len(single_label)
|
|
92
94
|
total_label_len = len(single_label)
|
|
93
95
|
total_labels.extend(single_label)
|
|
94
96
|
elif isinstance(labels[0], list):
|
|
95
97
|
# multilabel classification
|
|
96
|
-
multilabel_labels = cast(list[list[int]], labels)
|
|
98
|
+
multilabel_labels = cast("list[list[int]]", labels)
|
|
97
99
|
label_len = [len(l) for l in multilabel_labels]
|
|
98
100
|
total_label_len = sum(label_len)
|
|
99
101
|
for l in multilabel_labels:
|
mteb/abstasks/abstask.py
CHANGED
|
@@ -1,30 +1,38 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import json
|
|
2
4
|
import logging
|
|
3
5
|
import warnings
|
|
4
6
|
from abc import ABC, abstractmethod
|
|
5
|
-
from collections.abc import
|
|
7
|
+
from collections.abc import Sequence
|
|
6
8
|
from copy import copy
|
|
7
9
|
from pathlib import Path
|
|
8
|
-
from typing import Any, Literal, cast
|
|
10
|
+
from typing import TYPE_CHECKING, Any, Literal, cast
|
|
9
11
|
|
|
10
12
|
import numpy as np
|
|
11
13
|
from datasets import ClassLabel, Dataset, DatasetDict, load_dataset
|
|
12
14
|
from sklearn.preprocessing import MultiLabelBinarizer
|
|
13
15
|
from tqdm.auto import tqdm
|
|
14
|
-
from typing_extensions import Self
|
|
15
16
|
|
|
16
17
|
from mteb._set_seed import _set_seed
|
|
17
|
-
from mteb.abstasks.task_metadata import TaskMetadata
|
|
18
18
|
from mteb.languages import LanguageScripts
|
|
19
19
|
from mteb.models import (
|
|
20
20
|
CrossEncoderProtocol,
|
|
21
21
|
EncoderProtocol,
|
|
22
|
-
MTEBModels,
|
|
23
22
|
SearchProtocol,
|
|
24
23
|
)
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
from
|
|
24
|
+
|
|
25
|
+
if TYPE_CHECKING:
|
|
26
|
+
from collections.abc import Mapping
|
|
27
|
+
|
|
28
|
+
from typing_extensions import Self
|
|
29
|
+
|
|
30
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
31
|
+
from mteb.models import (
|
|
32
|
+
MTEBModels,
|
|
33
|
+
)
|
|
34
|
+
from mteb.types import EncodeKwargs, HFSubset, Modalities, ScoresDict
|
|
35
|
+
from mteb.types.statistics import DescriptiveStatistics, SplitDescriptiveStatistics
|
|
28
36
|
|
|
29
37
|
logger = logging.getLogger(__name__)
|
|
30
38
|
|
|
@@ -108,11 +116,14 @@ class AbsTask(ABC):
|
|
|
108
116
|
logger.warning(msg)
|
|
109
117
|
warnings.warn(msg)
|
|
110
118
|
|
|
111
|
-
def dataset_transform(self):
|
|
119
|
+
def dataset_transform(self, num_proc: int = 1):
|
|
112
120
|
"""A transform operations applied to the dataset after loading.
|
|
113
121
|
|
|
114
122
|
This method is useful when the dataset from Huggingface is not in an `mteb` compatible format.
|
|
115
123
|
Override this method if your dataset requires additional transformation.
|
|
124
|
+
|
|
125
|
+
Args:
|
|
126
|
+
num_proc: Number of processes to use for the transformation.
|
|
116
127
|
"""
|
|
117
128
|
pass
|
|
118
129
|
|
|
@@ -124,6 +135,7 @@ class AbsTask(ABC):
|
|
|
124
135
|
*,
|
|
125
136
|
encode_kwargs: EncodeKwargs,
|
|
126
137
|
prediction_folder: Path | None = None,
|
|
138
|
+
num_proc: int = 1,
|
|
127
139
|
**kwargs: Any,
|
|
128
140
|
) -> Mapping[HFSubset, ScoresDict]:
|
|
129
141
|
"""Evaluates an MTEB compatible model on the task.
|
|
@@ -134,6 +146,7 @@ class AbsTask(ABC):
|
|
|
134
146
|
subsets_to_run: List of huggingface subsets (HFSubsets) to evaluate. If None, all subsets are evaluated.
|
|
135
147
|
encode_kwargs: Additional keyword arguments that are passed to the model's `encode` method.
|
|
136
148
|
prediction_folder: Folder to save model predictions
|
|
149
|
+
num_proc: Number of processes to use for loading the dataset or processing.
|
|
137
150
|
kwargs: Additional keyword arguments that are passed to the _evaluate_subset method.
|
|
138
151
|
|
|
139
152
|
Returns:
|
|
@@ -163,7 +176,7 @@ class AbsTask(ABC):
|
|
|
163
176
|
if not self.data_loaded:
|
|
164
177
|
self.load_data()
|
|
165
178
|
|
|
166
|
-
self.dataset = cast(dict[HFSubset, DatasetDict], self.dataset)
|
|
179
|
+
self.dataset = cast("dict[HFSubset, DatasetDict]", self.dataset)
|
|
167
180
|
|
|
168
181
|
scores = {}
|
|
169
182
|
if self.hf_subsets is None:
|
|
@@ -189,6 +202,7 @@ class AbsTask(ABC):
|
|
|
189
202
|
hf_subset=hf_subset,
|
|
190
203
|
encode_kwargs=encode_kwargs,
|
|
191
204
|
prediction_folder=prediction_folder,
|
|
205
|
+
num_proc=num_proc,
|
|
192
206
|
**kwargs,
|
|
193
207
|
)
|
|
194
208
|
self._add_main_score(scores[hf_subset])
|
|
@@ -204,6 +218,7 @@ class AbsTask(ABC):
|
|
|
204
218
|
hf_subset: str,
|
|
205
219
|
encode_kwargs: EncodeKwargs,
|
|
206
220
|
prediction_folder: Path | None = None,
|
|
221
|
+
num_proc: int = 1,
|
|
207
222
|
**kwargs: Any,
|
|
208
223
|
) -> ScoresDict:
|
|
209
224
|
raise NotImplementedError(
|
|
@@ -308,11 +323,15 @@ class AbsTask(ABC):
|
|
|
308
323
|
) # only take the specified test split.
|
|
309
324
|
return dataset_dict
|
|
310
325
|
|
|
311
|
-
def load_data(self) -> None:
|
|
326
|
+
def load_data(self, num_proc: int = 1, **kwargs: Any) -> None:
|
|
312
327
|
"""Loads dataset from HuggingFace hub
|
|
313
328
|
|
|
314
329
|
This is the main loading function for Task. Do not overwrite this, instead we recommend using `dataset_transform`, which is called after the
|
|
315
330
|
dataset is loaded using `datasets.load_dataset`.
|
|
331
|
+
|
|
332
|
+
Args:
|
|
333
|
+
num_proc: Number of processes to use for loading the dataset.
|
|
334
|
+
kwargs: Additional keyword arguments passed to the load_dataset function. Keep for forward compatibility.
|
|
316
335
|
"""
|
|
317
336
|
if self.data_loaded:
|
|
318
337
|
return
|
|
@@ -325,11 +344,12 @@ class AbsTask(ABC):
|
|
|
325
344
|
self.dataset[hf_subset] = load_dataset(
|
|
326
345
|
name=hf_subset,
|
|
327
346
|
**self.metadata.dataset,
|
|
347
|
+
num_proc=num_proc,
|
|
328
348
|
)
|
|
329
349
|
else:
|
|
330
350
|
# some of monolingual datasets explicitly adding the split name to the dataset name
|
|
331
|
-
self.dataset = load_dataset(**self.metadata.dataset)
|
|
332
|
-
self.dataset_transform()
|
|
351
|
+
self.dataset = load_dataset(**self.metadata.dataset, num_proc=num_proc)
|
|
352
|
+
self.dataset_transform(num_proc=num_proc)
|
|
333
353
|
self.data_loaded = True
|
|
334
354
|
|
|
335
355
|
def fast_load(self) -> None:
|
|
@@ -352,12 +372,13 @@ class AbsTask(ABC):
|
|
|
352
372
|
self.dataset[lang] = DatasetDict(subset)
|
|
353
373
|
|
|
354
374
|
def calculate_descriptive_statistics(
|
|
355
|
-
self, overwrite_results: bool = False
|
|
375
|
+
self, overwrite_results: bool = False, num_proc: int = 1
|
|
356
376
|
) -> dict[str, DescriptiveStatistics]:
|
|
357
377
|
"""Calculates descriptive statistics from the dataset.
|
|
358
378
|
|
|
359
379
|
Args:
|
|
360
380
|
overwrite_results: Whether to overwrite existing results. If False and results already exist, the existing results will be loaded from cache.
|
|
381
|
+
num_proc: Number of processes to use for loading the dataset.
|
|
361
382
|
|
|
362
383
|
Returns:
|
|
363
384
|
A dictionary containing descriptive statistics for each split.
|
|
@@ -371,7 +392,7 @@ class AbsTask(ABC):
|
|
|
371
392
|
return existing_stats
|
|
372
393
|
|
|
373
394
|
if not self.data_loaded:
|
|
374
|
-
self.load_data()
|
|
395
|
+
self.load_data(num_proc=num_proc)
|
|
375
396
|
|
|
376
397
|
descriptive_stats: dict[str, DescriptiveStatistics] = {}
|
|
377
398
|
hf_subset_stat: Literal["hf_subset_descriptive_stats"] = (
|
|
@@ -509,7 +530,7 @@ class AbsTask(ABC):
|
|
|
509
530
|
scores["main_score"] = scores[self.metadata.main_score]
|
|
510
531
|
|
|
511
532
|
def _upload_dataset_to_hub(
|
|
512
|
-
self, repo_name: str, fields: list[str] | dict[str, str]
|
|
533
|
+
self, repo_name: str, fields: list[str] | dict[str, str], num_proc: int = 1
|
|
513
534
|
) -> None:
|
|
514
535
|
if self.dataset is None:
|
|
515
536
|
raise ValueError("Dataset not loaded")
|
|
@@ -534,7 +555,10 @@ class AbsTask(ABC):
|
|
|
534
555
|
)
|
|
535
556
|
sentences = DatasetDict(sentences)
|
|
536
557
|
sentences.push_to_hub(
|
|
537
|
-
repo_name,
|
|
558
|
+
repo_name,
|
|
559
|
+
config,
|
|
560
|
+
commit_message=f"Add {config} dataset",
|
|
561
|
+
num_proc=num_proc,
|
|
538
562
|
)
|
|
539
563
|
else:
|
|
540
564
|
sentences = {}
|
|
@@ -551,16 +575,19 @@ class AbsTask(ABC):
|
|
|
551
575
|
{field: self.dataset[split][field] for field in fields}
|
|
552
576
|
)
|
|
553
577
|
sentences = DatasetDict(sentences)
|
|
554
|
-
sentences.push_to_hub(
|
|
578
|
+
sentences.push_to_hub(
|
|
579
|
+
repo_name, commit_message="Add dataset", num_proc=num_proc
|
|
580
|
+
)
|
|
555
581
|
|
|
556
|
-
def _push_dataset_to_hub(self, repo_name: str) -> None:
|
|
582
|
+
def _push_dataset_to_hub(self, repo_name: str, num_proc: int = 1) -> None:
|
|
557
583
|
raise NotImplementedError
|
|
558
584
|
|
|
559
|
-
def push_dataset_to_hub(self, repo_name: str) -> None:
|
|
585
|
+
def push_dataset_to_hub(self, repo_name: str, num_proc: int = 1) -> None:
|
|
560
586
|
"""Push the dataset to the HuggingFace Hub.
|
|
561
587
|
|
|
562
588
|
Args:
|
|
563
589
|
repo_name: The name of the repository to push the dataset to.
|
|
590
|
+
num_proc: Number of processes to use for loading the dataset.
|
|
564
591
|
|
|
565
592
|
Examples:
|
|
566
593
|
>>> import mteb
|
|
@@ -572,7 +599,7 @@ class AbsTask(ABC):
|
|
|
572
599
|
if not self.data_loaded:
|
|
573
600
|
self.load_data()
|
|
574
601
|
|
|
575
|
-
self._push_dataset_to_hub(repo_name)
|
|
602
|
+
self._push_dataset_to_hub(repo_name, num_proc)
|
|
576
603
|
# dataset repo not creating when pushing card
|
|
577
604
|
self.metadata.push_dataset_card_to_hub(repo_name)
|
|
578
605
|
|
|
@@ -1,28 +1,39 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import logging
|
|
2
4
|
from datetime import datetime
|
|
5
|
+
from typing import TYPE_CHECKING
|
|
3
6
|
|
|
4
7
|
from pydantic import ConfigDict, Field, model_validator
|
|
5
|
-
from typing_extensions import Self
|
|
6
8
|
|
|
7
9
|
from mteb.types import (
|
|
8
|
-
ISOLanguageScript,
|
|
9
10
|
Languages,
|
|
10
|
-
Licenses,
|
|
11
|
-
Modalities,
|
|
12
|
-
StrDate,
|
|
13
11
|
)
|
|
14
12
|
|
|
15
13
|
from .abstask import AbsTask
|
|
16
14
|
from .task_metadata import (
|
|
17
|
-
AnnotatorType,
|
|
18
15
|
MetadataDatasetDict,
|
|
19
|
-
SampleCreationMethod,
|
|
20
|
-
TaskDomain,
|
|
21
16
|
TaskMetadata,
|
|
22
|
-
TaskSubtype,
|
|
23
17
|
TaskType,
|
|
24
18
|
)
|
|
25
19
|
|
|
20
|
+
if TYPE_CHECKING:
|
|
21
|
+
from typing_extensions import Self
|
|
22
|
+
|
|
23
|
+
from mteb.types import (
|
|
24
|
+
ISOLanguageScript,
|
|
25
|
+
Licenses,
|
|
26
|
+
Modalities,
|
|
27
|
+
StrDate,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
from .task_metadata import (
|
|
31
|
+
AnnotatorType,
|
|
32
|
+
SampleCreationMethod,
|
|
33
|
+
TaskDomain,
|
|
34
|
+
TaskSubtype,
|
|
35
|
+
)
|
|
36
|
+
|
|
26
37
|
logger = logging.getLogger(__name__)
|
|
27
38
|
|
|
28
39
|
|
mteb/abstasks/aggregated_task.py
CHANGED
|
@@ -1,19 +1,26 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import logging
|
|
2
4
|
import warnings
|
|
3
|
-
from
|
|
4
|
-
from pathlib import Path
|
|
5
|
-
from typing import Any
|
|
5
|
+
from typing import TYPE_CHECKING, Any
|
|
6
6
|
|
|
7
7
|
import numpy as np
|
|
8
|
-
from datasets import Dataset, DatasetDict
|
|
9
8
|
|
|
10
|
-
from mteb.models.models_protocols import MTEBModels
|
|
11
9
|
from mteb.results.task_result import TaskResult
|
|
12
|
-
from mteb.types import EncodeKwargs, HFSubset, ScoresDict
|
|
13
|
-
from mteb.types.statistics import DescriptiveStatistics
|
|
14
10
|
|
|
15
11
|
from .abstask import AbsTask
|
|
16
|
-
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from collections.abc import Mapping
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
|
|
17
|
+
from datasets import Dataset, DatasetDict
|
|
18
|
+
|
|
19
|
+
from mteb.models.models_protocols import MTEBModels
|
|
20
|
+
from mteb.types import EncodeKwargs, HFSubset, ScoresDict
|
|
21
|
+
from mteb.types.statistics import DescriptiveStatistics
|
|
22
|
+
|
|
23
|
+
from .aggregate_task_metadata import AggregateTaskMetadata
|
|
17
24
|
|
|
18
25
|
logger = logging.getLogger(__name__)
|
|
19
26
|
|