mteb 2.5.2__py3-none-any.whl → 2.7.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/__init__.py +2 -0
- mteb/_create_dataloaders.py +78 -30
- mteb/_evaluators/any_sts_evaluator.py +13 -6
- mteb/_evaluators/clustering_evaluator.py +13 -5
- mteb/_evaluators/evaluator.py +12 -4
- mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +22 -11
- mteb/_evaluators/pair_classification_evaluator.py +17 -7
- mteb/_evaluators/retrieval_evaluator.py +23 -14
- mteb/_evaluators/retrieval_metrics.py +26 -19
- mteb/_evaluators/sklearn_evaluator.py +27 -17
- mteb/_evaluators/text/bitext_mining_evaluator.py +36 -20
- mteb/_evaluators/text/summarization_evaluator.py +31 -20
- mteb/_evaluators/zeroshot_classification_evaluator.py +16 -5
- mteb/_helpful_enum.py +5 -1
- mteb/abstasks/_data_filter/filters.py +9 -3
- mteb/abstasks/_data_filter/task_pipelines.py +10 -2
- mteb/abstasks/_statistics_calculation.py +21 -11
- mteb/abstasks/_stratification.py +18 -18
- mteb/abstasks/abstask.py +78 -44
- mteb/abstasks/aggregate_task_metadata.py +21 -18
- mteb/abstasks/aggregated_task.py +23 -35
- mteb/abstasks/classification.py +39 -18
- mteb/abstasks/clustering.py +37 -20
- mteb/abstasks/clustering_legacy.py +30 -16
- mteb/abstasks/image/image_text_pair_classification.py +26 -9
- mteb/abstasks/multilabel_classification.py +33 -21
- mteb/abstasks/pair_classification.py +44 -19
- mteb/abstasks/regression.py +18 -10
- mteb/abstasks/retrieval.py +82 -52
- mteb/abstasks/retrieval_dataset_loaders.py +50 -39
- mteb/abstasks/sts.py +34 -15
- mteb/abstasks/task_metadata.py +44 -37
- mteb/abstasks/text/bitext_mining.py +57 -35
- mteb/abstasks/text/reranking.py +10 -8
- mteb/abstasks/text/summarization.py +26 -10
- mteb/abstasks/zeroshot_classification.py +27 -9
- mteb/benchmarks/_create_table.py +13 -7
- mteb/benchmarks/benchmark.py +15 -3
- mteb/benchmarks/benchmarks/__init__.py +6 -0
- mteb/benchmarks/benchmarks/benchmarks.py +153 -13
- mteb/benchmarks/benchmarks/rteb_benchmarks.py +20 -9
- mteb/benchmarks/get_benchmark.py +14 -55
- mteb/cache.py +189 -31
- mteb/cli/_display_tasks.py +10 -4
- mteb/cli/build_cli.py +112 -13
- mteb/cli/generate_model_card.py +50 -23
- mteb/deprecated_evaluator.py +72 -54
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2CybersecurityRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EconomicRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EnergyRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2HrRetrieval.json +32 -0
- mteb/descriptive_stats/Retrieval/BrightAopsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightBiologyLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightBiologyRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEarthScienceLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEarthScienceRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEconomicsLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEconomicsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightLeetcodeRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPonyLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPonyRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPsychologyLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPsychologyRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightRoboticsLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightRoboticsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightStackoverflowLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightStackoverflowRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightSustainableLivingLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightSustainableLivingRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightTheoremQAQuestionsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightTheoremQATheoremsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/ChemRxivRetrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/EuroPIRQRetrieval.json +116 -0
- mteb/descriptive_stats/Retrieval/NanoClimateFEVER-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoDBPedia-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoFEVER-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoHotpotQA-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoMSMARCO-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoNQ-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/TVPLRetrieval.json +30 -0
- mteb/evaluate.py +71 -47
- mteb/filter_tasks.py +36 -32
- mteb/get_tasks.py +37 -33
- mteb/languages/language_scripts.py +11 -4
- mteb/leaderboard/app.py +172 -37
- mteb/leaderboard/table.py +7 -2
- mteb/load_results.py +20 -14
- mteb/models/abs_encoder.py +30 -16
- mteb/models/cache_wrappers/cache_backend_protocol.py +7 -7
- mteb/models/cache_wrappers/cache_backends/_hash_utils.py +10 -5
- mteb/models/cache_wrappers/cache_backends/faiss_cache.py +13 -4
- mteb/models/cache_wrappers/cache_backends/numpy_cache.py +43 -25
- mteb/models/cache_wrappers/cache_wrapper.py +16 -11
- mteb/models/get_model_meta.py +53 -9
- mteb/models/instruct_wrapper.py +41 -13
- mteb/models/model_implementations/align_models.py +11 -5
- mteb/models/model_implementations/amazon_models.py +1 -0
- mteb/models/model_implementations/andersborges.py +6 -4
- mteb/models/model_implementations/ara_models.py +2 -1
- mteb/models/model_implementations/arctic_models.py +16 -8
- mteb/models/model_implementations/b1ade_models.py +2 -1
- mteb/models/model_implementations/bedrock_models.py +20 -6
- mteb/models/model_implementations/bge_models.py +85 -22
- mteb/models/model_implementations/bica_model.py +4 -3
- mteb/models/model_implementations/blip2_models.py +13 -6
- mteb/models/model_implementations/blip_models.py +33 -20
- mteb/models/model_implementations/bm25.py +27 -17
- mteb/models/model_implementations/bmretriever_models.py +16 -6
- mteb/models/model_implementations/cadet_models.py +2 -1
- mteb/models/model_implementations/cde_models.py +22 -9
- mteb/models/model_implementations/clip_models.py +18 -10
- mteb/models/model_implementations/clips_models.py +6 -3
- mteb/models/model_implementations/codefuse_models.py +10 -5
- mteb/models/model_implementations/codesage_models.py +6 -3
- mteb/models/model_implementations/cohere_models.py +19 -9
- mteb/models/model_implementations/cohere_v.py +16 -6
- mteb/models/model_implementations/colpali_models.py +10 -6
- mteb/models/model_implementations/colqwen_models.py +24 -38
- mteb/models/model_implementations/colsmol_models.py +5 -3
- mteb/models/model_implementations/conan_models.py +12 -5
- mteb/models/model_implementations/dino_models.py +70 -46
- mteb/models/model_implementations/e5_instruct.py +27 -4
- mteb/models/model_implementations/e5_models.py +18 -9
- mteb/models/model_implementations/e5_v.py +16 -10
- mteb/models/model_implementations/eagerworks_models.py +12 -5
- mteb/models/model_implementations/emillykkejensen_models.py +9 -6
- mteb/models/model_implementations/en_code_retriever.py +2 -1
- mteb/models/model_implementations/euler_models.py +3 -2
- mteb/models/model_implementations/evaclip_models.py +13 -4
- mteb/models/model_implementations/fa_models.py +18 -9
- mteb/models/model_implementations/facebookai.py +16 -2
- mteb/models/model_implementations/geogpt_models.py +2 -1
- mteb/models/model_implementations/gme_v_models.py +13 -8
- mteb/models/model_implementations/google_models.py +16 -5
- mteb/models/model_implementations/granite_vision_embedding_models.py +8 -6
- mteb/models/model_implementations/gritlm_models.py +5 -2
- mteb/models/model_implementations/gte_models.py +34 -13
- mteb/models/model_implementations/hinvec_models.py +7 -2
- mteb/models/model_implementations/human.py +1 -0
- mteb/models/model_implementations/ibm_granite_models.py +36 -6
- mteb/models/model_implementations/inf_models.py +4 -2
- mteb/models/model_implementations/jasper_models.py +16 -7
- mteb/models/model_implementations/jina_clip.py +58 -14
- mteb/models/model_implementations/jina_models.py +35 -16
- mteb/models/model_implementations/kalm_models.py +24 -12
- mteb/models/model_implementations/kblab.py +13 -6
- mteb/models/model_implementations/kennethenevoldsen_models.py +6 -4
- mteb/models/model_implementations/kfst.py +2 -1
- mteb/models/model_implementations/kowshik24_models.py +2 -1
- mteb/models/model_implementations/lens_models.py +2 -0
- mteb/models/model_implementations/lgai_embedding_models.py +2 -1
- mteb/models/model_implementations/linq_models.py +8 -2
- mteb/models/model_implementations/listconranker.py +11 -5
- mteb/models/model_implementations/llm2clip_models.py +18 -10
- mteb/models/model_implementations/llm2vec_models.py +28 -14
- mteb/models/model_implementations/mcinext_models.py +12 -3
- mteb/models/model_implementations/mdbr_models.py +19 -3
- mteb/models/model_implementations/misc_models.py +131 -68
- mteb/models/model_implementations/mixedbread_ai_models.py +335 -0
- mteb/models/model_implementations/mme5_models.py +3 -2
- mteb/models/model_implementations/moco_models.py +15 -8
- mteb/models/model_implementations/mod_models.py +3 -2
- mteb/models/model_implementations/model2vec_models.py +37 -18
- mteb/models/model_implementations/moka_models.py +4 -1
- mteb/models/model_implementations/nbailab.py +6 -3
- mteb/models/model_implementations/no_instruct_sentence_models.py +15 -7
- mteb/models/model_implementations/nomic_models.py +47 -19
- mteb/models/model_implementations/nomic_models_vision.py +6 -4
- mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +20 -8
- mteb/models/model_implementations/nvidia_models.py +165 -22
- mteb/models/model_implementations/octen_models.py +64 -3
- mteb/models/model_implementations/openai_models.py +14 -4
- mteb/models/model_implementations/openclip_models.py +30 -17
- mteb/models/model_implementations/opensearch_neural_sparse_models.py +20 -9
- mteb/models/model_implementations/ops_moa_models.py +10 -3
- mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +2 -1
- mteb/models/model_implementations/pawan_models.py +2 -1
- mteb/models/model_implementations/piccolo_models.py +3 -1
- mteb/models/model_implementations/pixie_models.py +56 -0
- mteb/models/model_implementations/promptriever_models.py +20 -10
- mteb/models/model_implementations/pylate_models.py +41 -21
- mteb/models/model_implementations/qodo_models.py +4 -2
- mteb/models/model_implementations/qtack_models.py +2 -1
- mteb/models/model_implementations/qwen3_models.py +14 -4
- mteb/models/model_implementations/qzhou_models.py +4 -2
- mteb/models/model_implementations/random_baseline.py +7 -6
- mteb/models/model_implementations/rasgaard_models.py +3 -2
- mteb/models/model_implementations/reasonir_model.py +66 -1
- mteb/models/model_implementations/repllama_models.py +18 -9
- mteb/models/model_implementations/rerankers_custom.py +25 -10
- mteb/models/model_implementations/rerankers_monot5_based.py +41 -21
- mteb/models/model_implementations/richinfoai_models.py +2 -1
- mteb/models/model_implementations/ru_sentence_models.py +40 -20
- mteb/models/model_implementations/ruri_models.py +20 -10
- mteb/models/model_implementations/salesforce_models.py +13 -4
- mteb/models/model_implementations/samilpwc_models.py +2 -1
- mteb/models/model_implementations/sarashina_embedding_models.py +4 -2
- mteb/models/model_implementations/searchmap_models.py +2 -1
- mteb/models/model_implementations/seed_1_6_embedding_models.py +5 -2
- mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +119 -148
- mteb/models/model_implementations/seed_models.py +2 -1
- mteb/models/model_implementations/sentence_transformers_models.py +142 -22
- mteb/models/model_implementations/shuu_model.py +2 -1
- mteb/models/model_implementations/siglip_models.py +39 -24
- mteb/models/model_implementations/slm_models.py +419 -0
- mteb/models/model_implementations/sonar_models.py +2 -1
- mteb/models/model_implementations/spartan8806_atles_champion.py +2 -1
- mteb/models/model_implementations/stella_models.py +23 -4
- mteb/models/model_implementations/tarka_models.py +4 -2
- mteb/models/model_implementations/text2vec_models.py +12 -3
- mteb/models/model_implementations/ua_sentence_models.py +2 -1
- mteb/models/model_implementations/uae_models.py +17 -5
- mteb/models/model_implementations/vdr_models.py +9 -2
- mteb/models/model_implementations/vi_vn_models.py +12 -6
- mteb/models/model_implementations/vista_models.py +11 -4
- mteb/models/model_implementations/vlm2vec_models.py +14 -7
- mteb/models/model_implementations/voyage_models.py +136 -4
- mteb/models/model_implementations/voyage_v.py +17 -10
- mteb/models/model_implementations/xyz_models.py +1 -0
- mteb/models/model_implementations/youtu_models.py +2 -1
- mteb/models/model_implementations/yuan_models.py +2 -1
- mteb/models/model_implementations/yuan_models_en.py +3 -2
- mteb/models/model_meta.py +127 -40
- mteb/models/models_protocols.py +43 -22
- mteb/models/search_encoder_index/search_backend_protocol.py +7 -3
- mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +21 -10
- mteb/models/search_wrappers.py +63 -29
- mteb/models/sentence_transformer_wrapper.py +52 -26
- mteb/models/vllm_wrapper.py +329 -0
- mteb/py.typed +0 -0
- mteb/results/benchmark_results.py +48 -35
- mteb/results/model_result.py +68 -32
- mteb/results/task_result.py +110 -72
- mteb/similarity_functions.py +19 -9
- mteb/tasks/aggregated_tasks/eng/cqadupstack_retrieval.py +3 -3
- mteb/tasks/aggregated_tasks/eng/sts17_multilingual_visual_sts_eng.py +3 -3
- mteb/tasks/aggregated_tasks/eng/sts_benchmark_multilingual_visual_sts_eng.py +3 -3
- mteb/tasks/aggregated_tasks/fas/cqadupstack_retrieval_fa.py +3 -3
- mteb/tasks/aggregated_tasks/fas/syn_per_chatbot_conv_sa_classification.py +3 -3
- mteb/tasks/aggregated_tasks/multilingual/sts17_multilingual_vision_sts.py +3 -3
- mteb/tasks/aggregated_tasks/multilingual/sts_benchmark_multilingual_visual_sts.py +3 -3
- mteb/tasks/aggregated_tasks/nld/cqadupstack_nl_retrieval.py +3 -3
- mteb/tasks/aggregated_tasks/pol/cqadupstack_retrieval_pl.py +3 -3
- mteb/tasks/bitext_mining/eng/pub_chem_smiles_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/fas/fa_mteb_summary_retrieval.py +3 -3
- mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/flores_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/in22_conv_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/in22_gen_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/norwegian_courts_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/ntrex_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/roma_tales_bitext_mining.py +2 -2
- mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -2
- mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -1
- mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -1
- mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -1
- mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -1
- mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -1
- mteb/tasks/classification/bul/bulgarian_store_review_sentiment_classfication.py +1 -1
- mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +2 -2
- mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -1
- mteb/tasks/classification/dan/dk_hate_classification.py +2 -2
- mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -1
- mteb/tasks/classification/ell/greek_legal_code_classification.py +1 -1
- mteb/tasks/classification/eng/dbpedia_classification.py +2 -2
- mteb/tasks/classification/eng/toxic_chat_classification.py +2 -2
- mteb/tasks/classification/eng/toxic_conversations_classification.py +2 -2
- mteb/tasks/classification/eng/tweet_topic_single_classification.py +1 -1
- mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -1
- mteb/tasks/classification/eng/yelp_review_full_classification.py +2 -2
- mteb/tasks/classification/est/estonian_valence.py +2 -2
- mteb/tasks/classification/fas/fa_mteb_classification.py +6 -6
- mteb/tasks/classification/fas/persian_food_sentiment_classification.py +1 -1
- mteb/tasks/classification/fil/filipino_shopee_reviews_classification.py +1 -1
- mteb/tasks/classification/fin/fin_toxicity_classification.py +1 -1
- mteb/tasks/classification/fra/french_book_reviews.py +2 -2
- mteb/tasks/classification/fra/movie_review_sentiment_classification.py +2 -2
- mteb/tasks/classification/guj/gujarati_news_classification.py +1 -1
- mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -1
- mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -1
- mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +2 -2
- mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -1
- mteb/tasks/classification/ita/dado_eval_coarse_classification.py +1 -1
- mteb/tasks/classification/ita/ita_casehold_classification.py +1 -1
- mteb/tasks/classification/ita/sardi_stance_classification.py +1 -1
- mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -1
- mteb/tasks/classification/jpn/wrime_classification.py +1 -1
- mteb/tasks/classification/kan/kannada_news_classification.py +2 -2
- mteb/tasks/classification/kor/klue_tc.py +2 -2
- mteb/tasks/classification/kor/kor_fin.py +1 -1
- mteb/tasks/classification/kor/kor_hate_classification.py +1 -1
- mteb/tasks/classification/kor/kor_sarcasm_classification.py +1 -1
- mteb/tasks/classification/kur/kurdish_sentiment_classification.py +2 -2
- mteb/tasks/classification/mal/malayalam_news_classification.py +1 -1
- mteb/tasks/classification/mar/marathi_news_classification.py +1 -1
- mteb/tasks/classification/multilingual/afri_senti_lang_classification.py +1 -1
- mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -1
- mteb/tasks/classification/multilingual/cyrillic_turkic_lang_classification.py +1 -1
- mteb/tasks/classification/multilingual/indic_nlp_news_classification.py +1 -1
- mteb/tasks/classification/multilingual/masakha_news_classification.py +1 -1
- mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -1
- mteb/tasks/classification/multilingual/multilingual_sentiment_classification.py +1 -1
- mteb/tasks/classification/multilingual/scala_classification.py +2 -2
- mteb/tasks/classification/multilingual/sib200_classification.py +1 -1
- mteb/tasks/classification/multilingual/turkic_classification.py +1 -1
- mteb/tasks/classification/multilingual/tweet_sentiment_classification.py +1 -1
- mteb/tasks/classification/nep/nepali_news_classification.py +2 -2
- mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +1 -1
- mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +1 -1
- mteb/tasks/classification/ory/odia_news_classification.py +2 -2
- mteb/tasks/classification/pan/punjabi_news_classification.py +1 -1
- mteb/tasks/classification/ron/moroco.py +1 -1
- mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -1
- mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -1
- mteb/tasks/classification/rus/georeview_classification.py +1 -1
- mteb/tasks/classification/rus/headline_classification.py +2 -2
- mteb/tasks/classification/rus/inappropriateness_classification.py +2 -2
- mteb/tasks/classification/rus/ru_reviews_classification.py +2 -2
- mteb/tasks/classification/rus/ru_sci_bench_grnti_classification.py +1 -1
- mteb/tasks/classification/rus/ru_sci_bench_oecd_classification.py +1 -1
- mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -1
- mteb/tasks/classification/san/sanskrit_shlokas_classification.py +1 -1
- mteb/tasks/classification/sin/sinhala_news_classification.py +2 -2
- mteb/tasks/classification/sin/sinhala_news_source_classification.py +2 -2
- mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +2 -2
- mteb/tasks/classification/slv/frenk_sl_classification.py +1 -1
- mteb/tasks/classification/spa/spanish_news_classification.py +2 -2
- mteb/tasks/classification/ssw/siswati_news_classification.py +1 -1
- mteb/tasks/classification/tam/tamil_news_classification.py +2 -2
- mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +2 -2
- mteb/tasks/classification/tha/wongnai_reviews_classification.py +1 -1
- mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +2 -2
- mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -2
- mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -1
- mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -1
- mteb/tasks/classification/zho/yue_openrice_review_classification.py +2 -2
- mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -1
- mteb/tasks/clustering/deu/blurbs_clustering_p2p.py +1 -1
- mteb/tasks/clustering/deu/blurbs_clustering_s2s.py +1 -1
- mteb/tasks/clustering/eng/arxiv_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/arxiv_hierarchical_clustering.py +2 -2
- mteb/tasks/clustering/eng/big_patent_clustering.py +1 -1
- mteb/tasks/clustering/eng/biorxiv_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/biorxiv_clustering_s2s.py +1 -1
- mteb/tasks/clustering/eng/hume_wiki_cities_clustering.py +1 -1
- mteb/tasks/clustering/eng/medrxiv_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/medrxiv_clustering_s2s.py +1 -1
- mteb/tasks/clustering/eng/reddit_clustering.py +1 -1
- mteb/tasks/clustering/eng/reddit_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/stack_exchange_clustering.py +1 -1
- mteb/tasks/clustering/eng/stack_exchange_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/twenty_newsgroups_clustering.py +1 -1
- mteb/tasks/clustering/eng/wiki_cities_clustering.py +1 -1
- mteb/tasks/clustering/fas/fa_mteb_clustering.py +4 -4
- mteb/tasks/clustering/fra/hal_clustering_s2s.py +2 -2
- mteb/tasks/clustering/multilingual/mlsum_clustering_p2p.py +2 -2
- mteb/tasks/clustering/multilingual/mlsum_clustering_s2s.py +2 -2
- mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -1
- mteb/tasks/clustering/multilingual/wiki_clustering_p2p.py +1 -1
- mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +1 -1
- mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +1 -1
- mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +1 -1
- mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +1 -1
- mteb/tasks/clustering/nld/vabb_clustering_p2p.py +1 -1
- mteb/tasks/clustering/nld/vabb_clustering_s2s.py +1 -1
- mteb/tasks/clustering/nob/snl_clustering.py +8 -3
- mteb/tasks/clustering/nob/vg_clustering.py +8 -3
- mteb/tasks/clustering/pol/polish_clustering.py +3 -3
- mteb/tasks/clustering/rus/ru_sci_bench_grnti_clustering_p2p.py +1 -1
- mteb/tasks/clustering/rus/ru_sci_bench_oecd_clustering_p2p.py +1 -1
- mteb/tasks/clustering/zho/cmteb_clustering.py +6 -6
- mteb/tasks/image_text_pair_classification/eng/image_co_de.py +1 -1
- mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +2 -2
- mteb/tasks/instruction_reranking/multilingual/m_follow_ir.py +2 -2
- mteb/tasks/multichoice/eng/cv_bench.py +4 -4
- mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -1
- mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -1
- mteb/tasks/multilabel_classification/rus/ru_toixic_multilabelclassification_okmlcup.py +1 -1
- mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
- mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -1
- mteb/tasks/pair_classification/ara/ar_entail.py +1 -1
- mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -1
- mteb/tasks/pair_classification/deu/false_friends_de_en_pc.py +1 -1
- mteb/tasks/pair_classification/eng/pub_chem_ai_sentence_paraphrase_pc.py +1 -1
- mteb/tasks/pair_classification/eng/pub_chem_smilespc.py +4 -3
- mteb/tasks/pair_classification/eng/pub_chem_synonym_pc.py +1 -1
- mteb/tasks/pair_classification/eng/pub_chem_wiki_paragraphs_pc.py +1 -1
- mteb/tasks/pair_classification/eng/sprint_duplicate_questions_pc.py +1 -1
- mteb/tasks/pair_classification/eng/twitter_sem_eval2015_pc.py +1 -1
- mteb/tasks/pair_classification/eng/twitter_url_corpus_pc.py +1 -1
- mteb/tasks/pair_classification/fas/fa_mteb_pair_classification.py +5 -5
- mteb/tasks/pair_classification/fas/fars_tail.py +2 -2
- mteb/tasks/pair_classification/hye/armenian_paraphrase_pc.py +1 -1
- mteb/tasks/pair_classification/ita/dis_co_tex_pair_classification.py +1 -1
- mteb/tasks/pair_classification/kor/klue_nli.py +1 -1
- mteb/tasks/pair_classification/multilingual/rte3.py +2 -2
- mteb/tasks/pair_classification/multilingual/xnli.py +1 -1
- mteb/tasks/pair_classification/pol/polish_pc.py +4 -4
- mteb/tasks/pair_classification/por/assin2_rte.py +1 -1
- mteb/tasks/pair_classification/por/sick_br_pc.py +1 -1
- mteb/tasks/pair_classification/rus/terra.py +2 -2
- mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -1
- mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -1
- mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -1
- mteb/tasks/pair_classification/zho/cmteb_pair_classification.py +2 -2
- mteb/tasks/reranking/multilingual/wikipedia_reranking_multilingual.py +1 -1
- mteb/tasks/retrieval/ara/sadeem_question_retrieval.py +1 -1
- mteb/tasks/retrieval/code/code_edit_search_retrieval.py +1 -1
- mteb/tasks/retrieval/code/code_rag.py +16 -16
- mteb/tasks/retrieval/code/code_search_net_cc_retrieval.py +1 -1
- mteb/tasks/retrieval/code/coir_code_search_net_retrieval.py +1 -1
- mteb/tasks/retrieval/code/ds1000_retrieval.py +1 -1
- mteb/tasks/retrieval/code/fresh_stack_retrieval.py +1 -1
- mteb/tasks/retrieval/code/human_eval_retrieval.py +1 -1
- mteb/tasks/retrieval/code/mbpp_retrieval.py +1 -1
- mteb/tasks/retrieval/code/wiki_sql_retrieval.py +1 -1
- mteb/tasks/retrieval/dan/dan_fever_retrieval.py +2 -2
- mteb/tasks/retrieval/dan/tv2_nordretrieval.py +3 -3
- mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +3 -3
- mteb/tasks/retrieval/deu/german_gov_service_retrieval.py +1 -1
- mteb/tasks/retrieval/deu/german_qu_ad_retrieval.py +1 -1
- mteb/tasks/retrieval/ell/greek_civics_qa.py +1 -1
- mteb/tasks/retrieval/eng/__init__.py +44 -0
- mteb/tasks/retrieval/eng/bright_retrieval.py +10 -2
- mteb/tasks/retrieval/eng/bright_v1_1_retrieval.py +968 -0
- mteb/tasks/retrieval/eng/chat_doctor_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/chemrxiv.py +33 -0
- mteb/tasks/retrieval/eng/cub200_i2i_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/fin_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/finance_bench_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/hc3_finance_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_narrative_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_needle_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_passkey_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_summ_screen_fd_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_wikim_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lembqm_sum_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/limit_retrieval.py +6 -1
- mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/ml_questions.py +1 -1
- mteb/tasks/retrieval/eng/nano_argu_ana_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_climate_fever_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_db_pedia_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_fever_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_fi_qa2018_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_hotpot_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_msmarco_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_nf_corpus_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_nq_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_quora_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_sci_fact_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_scidocs_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_touche2020_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/narrative_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/r2_med_retrieval.py +8 -8
- mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +10 -10
- mteb/tasks/retrieval/fra/f_qu_ad_retrieval.py +1 -1
- mteb/tasks/retrieval/fra/syntec_retrieval.py +1 -1
- mteb/tasks/retrieval/hun/hun_sum2.py +1 -1
- mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +1 -1
- mteb/tasks/retrieval/kor/__init__.py +15 -1
- mteb/tasks/retrieval/kor/kovidore2_bench_retrieval.py +142 -0
- mteb/tasks/retrieval/multilingual/__init__.py +2 -0
- mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt19.py +1 -1
- mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt21.py +1 -1
- mteb/tasks/retrieval/multilingual/cur_ev1_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/euro_pirq_retrieval.py +43 -0
- mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/mr_tidy_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +5 -5
- mteb/tasks/retrieval/multilingual/statcan_dialogue_dataset_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +14 -4
- mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +90 -100
- mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/x_qu_ad_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_android_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_english_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_gaming_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_gis_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_mathematica_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_physics_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_programmers_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_stats_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_tex_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_unix_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_webmasters_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_wordpress_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nob/norquad.py +3 -3
- mteb/tasks/retrieval/nob/snl_retrieval.py +3 -3
- mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -1
- mteb/tasks/retrieval/tur/tur_hist_quad.py +1 -1
- mteb/tasks/retrieval/vie/__init__.py +14 -6
- mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/fevervn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +48 -0
- mteb/tasks/retrieval/vie/nqvn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/tvpl_retrieval.py +42 -0
- mteb/tasks/retrieval/vie/vie_qu_ad_retrieval.py +1 -1
- mteb/tasks/retrieval/vie/zac_legal_text_retrieval.py +15 -1
- mteb/tasks/sts/fao/faroese_sts.py +1 -1
- mteb/tasks/sts/fra/sick_fr_sts.py +1 -1
- mteb/tasks/sts/kor/klue_sts.py +1 -1
- mteb/tasks/sts/por/sick_br_sts.py +1 -1
- mteb/tasks/sts/rus/ru_para_phraser_sts.py +1 -1
- mteb/tasks/zeroshot_classification/eng/sci_mmir.py +1 -1
- mteb/types/__init__.py +2 -0
- mteb/types/_encoder_io.py +13 -1
- mteb/types/_result.py +2 -1
- mteb/types/statistics.py +18 -5
- {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/METADATA +15 -4
- {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/RECORD +528 -486
- {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/WHEEL +1 -1
- mteb/models/model_implementations/mxbai_models.py +0 -111
- {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/entry_points.txt +0 -0
- {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/top_level.txt +0 -0
|
@@ -1,5 +1,7 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import logging
|
|
2
|
-
from typing import TypedDict
|
|
4
|
+
from typing import TYPE_CHECKING, TypedDict
|
|
3
5
|
|
|
4
6
|
from datasets import (
|
|
5
7
|
Dataset,
|
|
@@ -11,13 +13,14 @@ from datasets import (
|
|
|
11
13
|
load_dataset,
|
|
12
14
|
)
|
|
13
15
|
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
16
|
+
if TYPE_CHECKING:
|
|
17
|
+
from mteb.types import (
|
|
18
|
+
CorpusDatasetType,
|
|
19
|
+
InstructionDatasetType,
|
|
20
|
+
QueryDatasetType,
|
|
21
|
+
RelevantDocumentsType,
|
|
22
|
+
TopRankedDocumentsType,
|
|
23
|
+
)
|
|
21
24
|
|
|
22
25
|
logger = logging.getLogger(__name__)
|
|
23
26
|
|
|
@@ -73,28 +76,36 @@ class RetrievalDatasetLoader:
|
|
|
73
76
|
self.config = config if config != "default" else None
|
|
74
77
|
self.dataset_configs = get_dataset_config_names(self.hf_repo, self.revision)
|
|
75
78
|
|
|
76
|
-
def load(
|
|
79
|
+
def load(
|
|
80
|
+
self,
|
|
81
|
+
num_proc: int = 1,
|
|
82
|
+
) -> RetrievalSplitData:
|
|
77
83
|
"""Loads the dataset split for the specified configuration.
|
|
78
84
|
|
|
85
|
+
Args:
|
|
86
|
+
num_proc: The number of processes to use.
|
|
87
|
+
|
|
79
88
|
Returns:
|
|
80
89
|
A dictionary containing the corpus, queries, relevant documents, instructions (if applicable), and top-ranked documents (if applicable).
|
|
81
90
|
"""
|
|
82
91
|
top_ranked = None
|
|
83
92
|
|
|
84
|
-
qrels = self._load_qrels()
|
|
85
|
-
corpus = self._load_corpus()
|
|
86
|
-
queries = self._load_queries()
|
|
93
|
+
qrels = self._load_qrels(num_proc)
|
|
94
|
+
corpus = self._load_corpus(num_proc)
|
|
95
|
+
queries = self._load_queries(num_proc)
|
|
87
96
|
|
|
88
97
|
queries = queries.filter(
|
|
89
98
|
lambda x: x["id"] in qrels.keys(), desc="Filtering queries by qrels"
|
|
90
99
|
)
|
|
91
100
|
|
|
92
101
|
if any(c.endswith("top_ranked") for c in self.dataset_configs):
|
|
93
|
-
top_ranked = self._load_top_ranked()
|
|
102
|
+
top_ranked = self._load_top_ranked(num_proc)
|
|
94
103
|
|
|
95
104
|
if any(c.endswith("instruction") for c in self.dataset_configs):
|
|
96
|
-
instructions = self._load_instructions()
|
|
97
|
-
queries = _combine_queries_with_instructions_datasets(
|
|
105
|
+
instructions = self._load_instructions(num_proc)
|
|
106
|
+
queries = _combine_queries_with_instructions_datasets(
|
|
107
|
+
queries, instructions, num_proc
|
|
108
|
+
)
|
|
98
109
|
|
|
99
110
|
return RetrievalSplitData(
|
|
100
111
|
corpus=corpus,
|
|
@@ -117,49 +128,50 @@ class RetrievalDatasetLoader:
|
|
|
117
128
|
f"Split {self.split} not found in {splits}. Please specify a valid split."
|
|
118
129
|
)
|
|
119
130
|
|
|
120
|
-
def _load_dataset_split(self, config: str) -> Dataset:
|
|
131
|
+
def _load_dataset_split(self, config: str, num_proc: int) -> Dataset:
|
|
121
132
|
return load_dataset(
|
|
122
133
|
self.hf_repo,
|
|
123
134
|
config,
|
|
124
135
|
split=self._get_split(config),
|
|
125
136
|
trust_remote_code=self.trust_remote_code,
|
|
126
137
|
revision=self.revision,
|
|
138
|
+
num_proc=num_proc,
|
|
127
139
|
)
|
|
128
140
|
|
|
129
|
-
def _load_corpus(self) -> CorpusDatasetType:
|
|
130
|
-
logger.info("Loading Corpus...")
|
|
131
|
-
|
|
141
|
+
def _load_corpus(self, num_proc: int) -> CorpusDatasetType:
|
|
132
142
|
config = f"{self.config}-corpus" if self.config is not None else "corpus"
|
|
133
|
-
|
|
143
|
+
logger.info("Loading corpus subset: %s", config)
|
|
144
|
+
|
|
145
|
+
corpus_ds = self._load_dataset_split(config, num_proc)
|
|
134
146
|
if "_id" in corpus_ds.column_names:
|
|
135
147
|
corpus_ds = corpus_ds.cast_column("_id", Value("string")).rename_column(
|
|
136
148
|
"_id", "id"
|
|
137
149
|
)
|
|
138
150
|
logger.info("Loaded %d %s Documents.", len(corpus_ds), self.split.upper())
|
|
139
|
-
logger.
|
|
151
|
+
logger.debug("Doc Example: %s", corpus_ds[0])
|
|
140
152
|
return corpus_ds
|
|
141
153
|
|
|
142
|
-
def _load_queries(self) -> QueryDatasetType:
|
|
143
|
-
logger.info("Loading Queries...")
|
|
144
|
-
|
|
154
|
+
def _load_queries(self, num_proc: int) -> QueryDatasetType:
|
|
145
155
|
config = f"{self.config}-queries" if self.config is not None else "queries"
|
|
156
|
+
logger.info("Loading queries subset: %s", config)
|
|
157
|
+
|
|
146
158
|
if "query" in self.dataset_configs:
|
|
147
159
|
config = "query"
|
|
148
|
-
queries_ds = self._load_dataset_split(config)
|
|
160
|
+
queries_ds = self._load_dataset_split(config, num_proc)
|
|
149
161
|
if "_id" in queries_ds.column_names:
|
|
150
162
|
queries_ds = queries_ds.cast_column("_id", Value("string")).rename_column(
|
|
151
163
|
"_id", "id"
|
|
152
164
|
)
|
|
153
165
|
|
|
154
166
|
logger.info("Loaded %d %s queries.", len(queries_ds), self.split.upper())
|
|
155
|
-
logger.
|
|
167
|
+
logger.debug("Query Example: %s", queries_ds[0])
|
|
156
168
|
|
|
157
169
|
return queries_ds
|
|
158
170
|
|
|
159
|
-
def _load_qrels(self) -> RelevantDocumentsType:
|
|
160
|
-
logger.info("Loading qrels...")
|
|
161
|
-
|
|
171
|
+
def _load_qrels(self, num_proc: int) -> RelevantDocumentsType:
|
|
162
172
|
config = f"{self.config}-qrels" if self.config is not None else "default"
|
|
173
|
+
|
|
174
|
+
logger.info("Loading qrels subset: %s", config)
|
|
163
175
|
if config == "default" and config not in self.dataset_configs:
|
|
164
176
|
if "qrels" in self.dataset_configs:
|
|
165
177
|
config = "qrels"
|
|
@@ -168,7 +180,7 @@ class RetrievalDatasetLoader:
|
|
|
168
180
|
"No qrels or default config found. Please specify a valid config or ensure the dataset has qrels."
|
|
169
181
|
)
|
|
170
182
|
|
|
171
|
-
qrels_ds = self._load_dataset_split(config)
|
|
183
|
+
qrels_ds = self._load_dataset_split(config, num_proc)
|
|
172
184
|
qrels_ds = qrels_ds.select_columns(["query-id", "corpus-id", "score"])
|
|
173
185
|
|
|
174
186
|
qrels_ds = qrels_ds.cast(
|
|
@@ -191,13 +203,12 @@ class RetrievalDatasetLoader:
|
|
|
191
203
|
logger.info("Loaded %d %s qrels.", len(qrels_dict), self.split.upper())
|
|
192
204
|
return qrels_dict
|
|
193
205
|
|
|
194
|
-
def _load_top_ranked(self) -> TopRankedDocumentsType:
|
|
195
|
-
logger.info("Loading Top Ranked")
|
|
196
|
-
|
|
206
|
+
def _load_top_ranked(self, num_proc: int) -> TopRankedDocumentsType:
|
|
197
207
|
config = (
|
|
198
208
|
f"{self.config}-top_ranked" if self.config is not None else "top_ranked"
|
|
199
209
|
)
|
|
200
|
-
|
|
210
|
+
logger.info("Loading top ranked subset: %s", config)
|
|
211
|
+
top_ranked_ds = self._load_dataset_split(config, num_proc)
|
|
201
212
|
top_ranked_ds = top_ranked_ds.cast(
|
|
202
213
|
Features(
|
|
203
214
|
{
|
|
@@ -215,13 +226,12 @@ class RetrievalDatasetLoader:
|
|
|
215
226
|
logger.info(f"Top ranked loaded: {len(top_ranked_ds)}")
|
|
216
227
|
return top_ranked_dict
|
|
217
228
|
|
|
218
|
-
def _load_instructions(self) -> InstructionDatasetType:
|
|
219
|
-
logger.info("Loading Instructions")
|
|
220
|
-
|
|
229
|
+
def _load_instructions(self, num_proc: int) -> InstructionDatasetType:
|
|
221
230
|
config = (
|
|
222
231
|
f"{self.config}-instruction" if self.config is not None else "instruction"
|
|
223
232
|
)
|
|
224
|
-
|
|
233
|
+
logger.info("Loading instruction subset: %s", config)
|
|
234
|
+
instructions_ds = self._load_dataset_split(config, num_proc)
|
|
225
235
|
instructions_ds = instructions_ds.cast(
|
|
226
236
|
Features(
|
|
227
237
|
{
|
|
@@ -236,6 +246,7 @@ class RetrievalDatasetLoader:
|
|
|
236
246
|
def _combine_queries_with_instructions_datasets(
|
|
237
247
|
queries_dataset: QueryDatasetType,
|
|
238
248
|
instruction_dataset: InstructionDatasetType | dict[str, str],
|
|
249
|
+
num_proc: int,
|
|
239
250
|
) -> Dataset:
|
|
240
251
|
if isinstance(instruction_dataset, Dataset):
|
|
241
252
|
instruction_to_query_idx = {
|
|
@@ -248,4 +259,4 @@ def _combine_queries_with_instructions_datasets(
|
|
|
248
259
|
row["instruction"] = instruction_to_query_idx[row["id"]]
|
|
249
260
|
return row
|
|
250
261
|
|
|
251
|
-
return queries_dataset.map(_add_instruction_to_query)
|
|
262
|
+
return queries_dataset.map(_add_instruction_to_query, num_proc=num_proc)
|
mteb/abstasks/sts.py
CHANGED
|
@@ -1,19 +1,14 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import logging
|
|
2
|
-
from
|
|
3
|
-
from typing import Any, TypedDict, cast
|
|
4
|
+
from typing import TYPE_CHECKING, Any, TypedDict, cast
|
|
4
5
|
|
|
5
|
-
from datasets import Dataset
|
|
6
6
|
from scipy.stats import pearsonr, spearmanr
|
|
7
7
|
|
|
8
8
|
from mteb._evaluators import AnySTSEvaluator
|
|
9
|
-
from mteb._evaluators.any_sts_evaluator import STSEvaluatorScores
|
|
10
9
|
from mteb.models import EncoderProtocol
|
|
11
|
-
from mteb.types import PromptType
|
|
12
10
|
from mteb.types.statistics import (
|
|
13
|
-
ImageStatistics,
|
|
14
|
-
ScoreStatistics,
|
|
15
11
|
SplitDescriptiveStatistics,
|
|
16
|
-
TextStatistics,
|
|
17
12
|
)
|
|
18
13
|
|
|
19
14
|
from ._statistics_calculation import (
|
|
@@ -23,6 +18,20 @@ from ._statistics_calculation import (
|
|
|
23
18
|
)
|
|
24
19
|
from .abstask import AbsTask
|
|
25
20
|
|
|
21
|
+
if TYPE_CHECKING:
|
|
22
|
+
from pathlib import Path
|
|
23
|
+
|
|
24
|
+
from datasets import Dataset
|
|
25
|
+
|
|
26
|
+
from mteb._evaluators.any_sts_evaluator import STSEvaluatorScores
|
|
27
|
+
from mteb.models import MTEBModels
|
|
28
|
+
from mteb.types import EncodeKwargs, PromptType
|
|
29
|
+
from mteb.types.statistics import (
|
|
30
|
+
ImageStatistics,
|
|
31
|
+
ScoreStatistics,
|
|
32
|
+
TextStatistics,
|
|
33
|
+
)
|
|
34
|
+
|
|
26
35
|
logger = logging.getLogger(__name__)
|
|
27
36
|
|
|
28
37
|
|
|
@@ -103,14 +112,18 @@ class AbsTaskSTS(AbsTask):
|
|
|
103
112
|
|
|
104
113
|
def _evaluate_subset(
|
|
105
114
|
self,
|
|
106
|
-
model:
|
|
115
|
+
model: MTEBModels,
|
|
107
116
|
data_split: Dataset,
|
|
108
|
-
encode_kwargs:
|
|
117
|
+
encode_kwargs: EncodeKwargs,
|
|
109
118
|
hf_split: str,
|
|
110
119
|
hf_subset: str,
|
|
111
120
|
prediction_folder: Path | None = None,
|
|
121
|
+
num_proc: int = 1,
|
|
112
122
|
**kwargs: Any,
|
|
113
123
|
) -> STSMetrics:
|
|
124
|
+
if not isinstance(model, EncoderProtocol):
|
|
125
|
+
raise TypeError("Expected model to be an instance of EncoderProtocol")
|
|
126
|
+
|
|
114
127
|
normalized_scores = list(map(self._normalize, data_split["score"]))
|
|
115
128
|
data_split = data_split.select_columns(list(self.column_names))
|
|
116
129
|
|
|
@@ -124,7 +137,11 @@ class AbsTaskSTS(AbsTask):
|
|
|
124
137
|
input2_prompt_type=self.input2_prompt_type,
|
|
125
138
|
**kwargs,
|
|
126
139
|
)
|
|
127
|
-
scores = evaluator(
|
|
140
|
+
scores = evaluator(
|
|
141
|
+
model,
|
|
142
|
+
encode_kwargs=encode_kwargs,
|
|
143
|
+
num_proc=num_proc,
|
|
144
|
+
)
|
|
128
145
|
|
|
129
146
|
if prediction_folder:
|
|
130
147
|
self._save_task_predictions(
|
|
@@ -142,7 +159,7 @@ class AbsTaskSTS(AbsTask):
|
|
|
142
159
|
) -> STSMetrics:
|
|
143
160
|
def compute_corr(x: list[float], y: list[float]) -> tuple[float, float]:
|
|
144
161
|
"""Return (pearson, spearman) correlations between x and y."""
|
|
145
|
-
return pearsonr(x, y)[0], spearmanr(x, y)[0]
|
|
162
|
+
return float(pearsonr(x, y)[0]), float(spearmanr(x, y)[0])
|
|
146
163
|
|
|
147
164
|
cosine_pearson, cosine_spearman = compute_corr(
|
|
148
165
|
normalized_scores, scores["cosine_scores"]
|
|
@@ -179,7 +196,7 @@ class AbsTaskSTS(AbsTask):
|
|
|
179
196
|
self, split: str, hf_subset: str | None = None, compute_overall: bool = False
|
|
180
197
|
) -> AnySTSDescriptiveStatistics:
|
|
181
198
|
first_column, second_column = self.column_names
|
|
182
|
-
self.dataset = cast(dict[str, dict[str, Dataset]], self.dataset)
|
|
199
|
+
self.dataset = cast("dict[str, dict[str, Dataset]]", self.dataset)
|
|
183
200
|
|
|
184
201
|
if hf_subset:
|
|
185
202
|
sentence1 = self.dataset[hf_subset][split][first_column]
|
|
@@ -233,9 +250,11 @@ class AbsTaskSTS(AbsTask):
|
|
|
233
250
|
label_statistics=labels_statistics,
|
|
234
251
|
)
|
|
235
252
|
|
|
236
|
-
def _push_dataset_to_hub(self, repo_name: str) -> None:
|
|
253
|
+
def _push_dataset_to_hub(self, repo_name: str, num_proc: int = 1) -> None:
|
|
237
254
|
self._upload_dataset_to_hub(
|
|
238
|
-
repo_name,
|
|
255
|
+
repo_name,
|
|
256
|
+
[self.column_names[0], self.column_names[1], "score"],
|
|
257
|
+
num_proc=num_proc,
|
|
239
258
|
)
|
|
240
259
|
|
|
241
260
|
def _normalize(self, x: float) -> float:
|
mteb/abstasks/task_metadata.py
CHANGED
|
@@ -1,8 +1,10 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import json
|
|
2
4
|
import logging
|
|
3
5
|
from collections.abc import Sequence
|
|
4
6
|
from pathlib import Path
|
|
5
|
-
from typing import Any, Literal
|
|
7
|
+
from typing import TYPE_CHECKING, Any, Literal, cast
|
|
6
8
|
|
|
7
9
|
from huggingface_hub import (
|
|
8
10
|
DatasetCard,
|
|
@@ -16,13 +18,11 @@ from pydantic import (
|
|
|
16
18
|
ConfigDict,
|
|
17
19
|
field_validator,
|
|
18
20
|
)
|
|
19
|
-
from typing_extensions import Required, TypedDict
|
|
21
|
+
from typing_extensions import Required, TypedDict # noqa: TC002
|
|
20
22
|
|
|
21
23
|
import mteb
|
|
22
24
|
from mteb.languages import check_language_code
|
|
23
25
|
from mteb.types import (
|
|
24
|
-
HFSubset,
|
|
25
|
-
ISOLanguageScript,
|
|
26
26
|
Languages,
|
|
27
27
|
Licenses,
|
|
28
28
|
Modalities,
|
|
@@ -30,7 +30,17 @@ from mteb.types import (
|
|
|
30
30
|
StrDate,
|
|
31
31
|
StrURL,
|
|
32
32
|
)
|
|
33
|
-
|
|
33
|
+
|
|
34
|
+
if TYPE_CHECKING:
|
|
35
|
+
from huggingface_hub import (
|
|
36
|
+
CardData,
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
from mteb.types import (
|
|
40
|
+
HFSubset,
|
|
41
|
+
ISOLanguageScript,
|
|
42
|
+
)
|
|
43
|
+
from mteb.types.statistics import DescriptiveStatistics
|
|
34
44
|
|
|
35
45
|
logger = logging.getLogger(__name__)
|
|
36
46
|
|
|
@@ -150,7 +160,7 @@ _TASK_TYPE = (
|
|
|
150
160
|
"InstructionReranking",
|
|
151
161
|
) + MIEB_TASK_TYPE
|
|
152
162
|
|
|
153
|
-
TaskType = Literal[_TASK_TYPE]
|
|
163
|
+
TaskType = Literal[_TASK_TYPE] # type: ignore[valid-type]
|
|
154
164
|
"""The type of the task. E.g. includes "Classification", "Retrieval" and "Clustering"."""
|
|
155
165
|
|
|
156
166
|
|
|
@@ -192,8 +202,10 @@ AnnotatorType = Literal[
|
|
|
192
202
|
"""The type of the annotators. Is often important for understanding the quality of a dataset."""
|
|
193
203
|
|
|
194
204
|
|
|
195
|
-
PromptDict = TypedDict(
|
|
196
|
-
"PromptDict",
|
|
205
|
+
PromptDict = TypedDict( # type: ignore[misc]
|
|
206
|
+
"PromptDict",
|
|
207
|
+
{prompt_type.value: str for prompt_type in PromptType},
|
|
208
|
+
total=False,
|
|
197
209
|
)
|
|
198
210
|
"""A dictionary containing the prompt used for the task.
|
|
199
211
|
|
|
@@ -365,7 +377,7 @@ class TaskMetadata(BaseModel):
|
|
|
365
377
|
"""Return a dictionary mapping huggingface subsets to languages."""
|
|
366
378
|
if isinstance(self.eval_langs, dict):
|
|
367
379
|
return self.eval_langs
|
|
368
|
-
return {"default": self.eval_langs}
|
|
380
|
+
return {"default": cast("list[str]", self.eval_langs)}
|
|
369
381
|
|
|
370
382
|
@property
|
|
371
383
|
def intext_citation(self, include_cite: bool = True) -> str:
|
|
@@ -376,9 +388,8 @@ class TaskMetadata(BaseModel):
|
|
|
376
388
|
if include_cite and cite:
|
|
377
389
|
# check for whitespace in the citation
|
|
378
390
|
if " " in cite:
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
)
|
|
391
|
+
msg = "Citation contains whitespace. Please ensure that the citation is correctly formatted."
|
|
392
|
+
logger.warning(msg)
|
|
382
393
|
return f"\\cite{{{cite}}}"
|
|
383
394
|
return cite
|
|
384
395
|
|
|
@@ -414,7 +425,7 @@ class TaskMetadata(BaseModel):
|
|
|
414
425
|
for subset, subset_value in stats.items():
|
|
415
426
|
if subset == "hf_subset_descriptive_stats":
|
|
416
427
|
continue
|
|
417
|
-
n_samples[subset] = subset_value["num_samples"]
|
|
428
|
+
n_samples[subset] = subset_value["num_samples"]
|
|
418
429
|
return n_samples
|
|
419
430
|
|
|
420
431
|
@property
|
|
@@ -447,7 +458,7 @@ class TaskMetadata(BaseModel):
|
|
|
447
458
|
Raises:
|
|
448
459
|
ValueError: If the prompt type is not recognized.
|
|
449
460
|
"""
|
|
450
|
-
if prompt_type is None:
|
|
461
|
+
if prompt_type is None or self.category is None:
|
|
451
462
|
return self.modalities
|
|
452
463
|
query_modalities, doc_modalities = self.category.split("2")
|
|
453
464
|
category_to_modality: dict[str, Modalities] = {
|
|
@@ -467,7 +478,7 @@ class TaskMetadata(BaseModel):
|
|
|
467
478
|
|
|
468
479
|
def _create_dataset_card_data(
|
|
469
480
|
self,
|
|
470
|
-
existing_dataset_card_data:
|
|
481
|
+
existing_dataset_card_data: CardData | None = None,
|
|
471
482
|
) -> tuple[DatasetCardData, dict[str, Any]]:
|
|
472
483
|
"""Create a DatasetCardData object from the task metadata.
|
|
473
484
|
|
|
@@ -483,7 +494,6 @@ class TaskMetadata(BaseModel):
|
|
|
483
494
|
dataset_type = [
|
|
484
495
|
*self._hf_task_type(),
|
|
485
496
|
*self._hf_task_category(),
|
|
486
|
-
*self._hf_subtypes(),
|
|
487
497
|
]
|
|
488
498
|
languages = self._hf_languages()
|
|
489
499
|
|
|
@@ -502,12 +512,13 @@ class TaskMetadata(BaseModel):
|
|
|
502
512
|
|
|
503
513
|
tags = ["mteb"] + self.modalities
|
|
504
514
|
|
|
505
|
-
descriptive_stats =
|
|
506
|
-
if descriptive_stats is not None:
|
|
507
|
-
|
|
515
|
+
descriptive_stats = ""
|
|
516
|
+
if self.descriptive_stats is not None:
|
|
517
|
+
descriptive_stats_ = self.descriptive_stats
|
|
518
|
+
for split, split_stat in descriptive_stats_.items():
|
|
508
519
|
if len(split_stat.get("hf_subset_descriptive_stats", {})) > 10:
|
|
509
520
|
split_stat.pop("hf_subset_descriptive_stats", {})
|
|
510
|
-
descriptive_stats = json.dumps(
|
|
521
|
+
descriptive_stats = json.dumps(descriptive_stats_, indent=4)
|
|
511
522
|
|
|
512
523
|
dataset_card_data_params = existing_dataset_card_data.to_dict()
|
|
513
524
|
# override the existing values
|
|
@@ -584,10 +595,8 @@ class TaskMetadata(BaseModel):
|
|
|
584
595
|
|
|
585
596
|
def _hf_subtypes(self) -> list[str]:
|
|
586
597
|
# to get full list of available task_ids execute
|
|
587
|
-
#
|
|
588
|
-
#
|
|
589
|
-
# "repoType": "dataset"
|
|
590
|
-
# })
|
|
598
|
+
# https://huggingface.co/api/datasets-tags-by-type?type=task_ids
|
|
599
|
+
# ref https://huggingface-openapi.hf.space/#tag/datasets/GET/api/datasets-tags-by-type
|
|
591
600
|
mteb_to_hf_subtype = {
|
|
592
601
|
"Article retrieval": ["document-retrieval"],
|
|
593
602
|
"Conversational retrieval": ["conversational", "utterance-retrieval"],
|
|
@@ -609,7 +618,7 @@ class TaskMetadata(BaseModel):
|
|
|
609
618
|
"hate-speech-detection",
|
|
610
619
|
],
|
|
611
620
|
"Thematic clustering": [],
|
|
612
|
-
"Scientific Reranking": [],
|
|
621
|
+
"Scientific Reranking": ["text-scoring"],
|
|
613
622
|
"Claim verification": ["fact-checking", "fact-checking-retrieval"],
|
|
614
623
|
"Topic classification": ["topic-classification"],
|
|
615
624
|
"Code retrieval": [],
|
|
@@ -617,21 +626,21 @@ class TaskMetadata(BaseModel):
|
|
|
617
626
|
"Cross-Lingual Semantic Discrimination": [],
|
|
618
627
|
"Textual Entailment": ["natural-language-inference"],
|
|
619
628
|
"Counterfactual Detection": [],
|
|
620
|
-
"Emotion classification": [],
|
|
629
|
+
"Emotion classification": ["sentiment-classification"],
|
|
621
630
|
"Reasoning as Retrieval": [],
|
|
622
631
|
"Rendered Texts Understanding": [],
|
|
623
632
|
"Image Text Retrieval": [],
|
|
624
633
|
"Object recognition": [],
|
|
625
634
|
"Scene recognition": [],
|
|
626
635
|
"Caption Pairing": ["image-captioning"],
|
|
627
|
-
"Emotion recognition": [],
|
|
636
|
+
"Emotion recognition": ["sentiment-scoring"],
|
|
628
637
|
"Textures recognition": [],
|
|
629
638
|
"Activity recognition": [],
|
|
630
639
|
"Tumor detection": [],
|
|
631
640
|
"Duplicate Detection": [],
|
|
632
641
|
"Rendered semantic textual similarity": [
|
|
633
642
|
"semantic-similarity-scoring",
|
|
634
|
-
"
|
|
643
|
+
"semantic-similarity-classification",
|
|
635
644
|
],
|
|
636
645
|
"Intent classification": [
|
|
637
646
|
"intent-classification",
|
|
@@ -645,10 +654,8 @@ class TaskMetadata(BaseModel):
|
|
|
645
654
|
|
|
646
655
|
def _hf_task_type(self) -> list[str]:
|
|
647
656
|
# to get full list of task_types execute:
|
|
648
|
-
#
|
|
649
|
-
#
|
|
650
|
-
# }).json()
|
|
651
|
-
# or look at https://huggingface.co/tasks
|
|
657
|
+
# https://huggingface.co/api/datasets-tags-by-type?type=task_categories
|
|
658
|
+
# ref https://huggingface-openapi.hf.space/#tag/datasets/GET/api/datasets-tags-by-type
|
|
652
659
|
mteb_task_type_to_datasets = {
|
|
653
660
|
# Text
|
|
654
661
|
"BitextMining": ["translation"],
|
|
@@ -667,7 +674,7 @@ class TaskMetadata(BaseModel):
|
|
|
667
674
|
"Any2AnyRetrieval": ["visual-document-retrieval"],
|
|
668
675
|
"Any2AnyMultilingualRetrieval": ["visual-document-retrieval"],
|
|
669
676
|
"VisionCentricQA": ["visual-question-answering"],
|
|
670
|
-
"ImageClustering": ["image-
|
|
677
|
+
"ImageClustering": ["image-feature-extraction"],
|
|
671
678
|
"ImageClassification": ["image-classification"],
|
|
672
679
|
"ImageMultilabelClassification": ["image-classification"],
|
|
673
680
|
"DocumentUnderstanding": ["visual-document-retrieval"],
|
|
@@ -695,11 +702,11 @@ class TaskMetadata(BaseModel):
|
|
|
695
702
|
|
|
696
703
|
def _hf_languages(self) -> list[str]:
|
|
697
704
|
languages: list[str] = []
|
|
698
|
-
if self.is_multilingual:
|
|
699
|
-
for val in
|
|
705
|
+
if self.is_multilingual and isinstance(self.eval_langs, dict):
|
|
706
|
+
for val in self.eval_langs.values():
|
|
700
707
|
languages.extend(val)
|
|
701
708
|
else:
|
|
702
|
-
languages = self.eval_langs
|
|
709
|
+
languages = cast("list[str]", self.eval_langs)
|
|
703
710
|
# value "python" is not valid. It must be an ISO 639-1, 639-2 or 639-3 code (two/three letters),
|
|
704
711
|
# or a special value like "code", "multilingual".
|
|
705
712
|
readme_langs = []
|
|
@@ -711,7 +718,7 @@ class TaskMetadata(BaseModel):
|
|
|
711
718
|
readme_langs.append(lang_name)
|
|
712
719
|
return sorted(set(readme_langs))
|
|
713
720
|
|
|
714
|
-
def _hf_license(self) -> str:
|
|
721
|
+
def _hf_license(self) -> str | None:
|
|
715
722
|
dataset_license = self.license
|
|
716
723
|
if dataset_license:
|
|
717
724
|
license_mapping = {
|