mteb 2.5.2__py3-none-any.whl → 2.7.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/__init__.py +2 -0
- mteb/_create_dataloaders.py +78 -30
- mteb/_evaluators/any_sts_evaluator.py +13 -6
- mteb/_evaluators/clustering_evaluator.py +13 -5
- mteb/_evaluators/evaluator.py +12 -4
- mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +22 -11
- mteb/_evaluators/pair_classification_evaluator.py +17 -7
- mteb/_evaluators/retrieval_evaluator.py +23 -14
- mteb/_evaluators/retrieval_metrics.py +26 -19
- mteb/_evaluators/sklearn_evaluator.py +27 -17
- mteb/_evaluators/text/bitext_mining_evaluator.py +36 -20
- mteb/_evaluators/text/summarization_evaluator.py +31 -20
- mteb/_evaluators/zeroshot_classification_evaluator.py +16 -5
- mteb/_helpful_enum.py +5 -1
- mteb/abstasks/_data_filter/filters.py +9 -3
- mteb/abstasks/_data_filter/task_pipelines.py +10 -2
- mteb/abstasks/_statistics_calculation.py +21 -11
- mteb/abstasks/_stratification.py +18 -18
- mteb/abstasks/abstask.py +78 -44
- mteb/abstasks/aggregate_task_metadata.py +21 -18
- mteb/abstasks/aggregated_task.py +23 -35
- mteb/abstasks/classification.py +39 -18
- mteb/abstasks/clustering.py +37 -20
- mteb/abstasks/clustering_legacy.py +30 -16
- mteb/abstasks/image/image_text_pair_classification.py +26 -9
- mteb/abstasks/multilabel_classification.py +33 -21
- mteb/abstasks/pair_classification.py +44 -19
- mteb/abstasks/regression.py +18 -10
- mteb/abstasks/retrieval.py +82 -52
- mteb/abstasks/retrieval_dataset_loaders.py +50 -39
- mteb/abstasks/sts.py +34 -15
- mteb/abstasks/task_metadata.py +44 -37
- mteb/abstasks/text/bitext_mining.py +57 -35
- mteb/abstasks/text/reranking.py +10 -8
- mteb/abstasks/text/summarization.py +26 -10
- mteb/abstasks/zeroshot_classification.py +27 -9
- mteb/benchmarks/_create_table.py +13 -7
- mteb/benchmarks/benchmark.py +15 -3
- mteb/benchmarks/benchmarks/__init__.py +6 -0
- mteb/benchmarks/benchmarks/benchmarks.py +153 -13
- mteb/benchmarks/benchmarks/rteb_benchmarks.py +20 -9
- mteb/benchmarks/get_benchmark.py +14 -55
- mteb/cache.py +189 -31
- mteb/cli/_display_tasks.py +10 -4
- mteb/cli/build_cli.py +112 -13
- mteb/cli/generate_model_card.py +50 -23
- mteb/deprecated_evaluator.py +72 -54
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2CybersecurityRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EconomicRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EnergyRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2HrRetrieval.json +32 -0
- mteb/descriptive_stats/Retrieval/BrightAopsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightBiologyLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightBiologyRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEarthScienceLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEarthScienceRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEconomicsLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEconomicsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightLeetcodeRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPonyLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPonyRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPsychologyLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPsychologyRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightRoboticsLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightRoboticsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightStackoverflowLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightStackoverflowRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightSustainableLivingLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightSustainableLivingRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightTheoremQAQuestionsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightTheoremQATheoremsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/ChemRxivRetrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/EuroPIRQRetrieval.json +116 -0
- mteb/descriptive_stats/Retrieval/NanoClimateFEVER-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoDBPedia-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoFEVER-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoHotpotQA-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoMSMARCO-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoNQ-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/TVPLRetrieval.json +30 -0
- mteb/evaluate.py +71 -47
- mteb/filter_tasks.py +36 -32
- mteb/get_tasks.py +37 -33
- mteb/languages/language_scripts.py +11 -4
- mteb/leaderboard/app.py +172 -37
- mteb/leaderboard/table.py +7 -2
- mteb/load_results.py +20 -14
- mteb/models/abs_encoder.py +30 -16
- mteb/models/cache_wrappers/cache_backend_protocol.py +7 -7
- mteb/models/cache_wrappers/cache_backends/_hash_utils.py +10 -5
- mteb/models/cache_wrappers/cache_backends/faiss_cache.py +13 -4
- mteb/models/cache_wrappers/cache_backends/numpy_cache.py +43 -25
- mteb/models/cache_wrappers/cache_wrapper.py +16 -11
- mteb/models/get_model_meta.py +53 -9
- mteb/models/instruct_wrapper.py +41 -13
- mteb/models/model_implementations/align_models.py +11 -5
- mteb/models/model_implementations/amazon_models.py +1 -0
- mteb/models/model_implementations/andersborges.py +6 -4
- mteb/models/model_implementations/ara_models.py +2 -1
- mteb/models/model_implementations/arctic_models.py +16 -8
- mteb/models/model_implementations/b1ade_models.py +2 -1
- mteb/models/model_implementations/bedrock_models.py +20 -6
- mteb/models/model_implementations/bge_models.py +85 -22
- mteb/models/model_implementations/bica_model.py +4 -3
- mteb/models/model_implementations/blip2_models.py +13 -6
- mteb/models/model_implementations/blip_models.py +33 -20
- mteb/models/model_implementations/bm25.py +27 -17
- mteb/models/model_implementations/bmretriever_models.py +16 -6
- mteb/models/model_implementations/cadet_models.py +2 -1
- mteb/models/model_implementations/cde_models.py +22 -9
- mteb/models/model_implementations/clip_models.py +18 -10
- mteb/models/model_implementations/clips_models.py +6 -3
- mteb/models/model_implementations/codefuse_models.py +10 -5
- mteb/models/model_implementations/codesage_models.py +6 -3
- mteb/models/model_implementations/cohere_models.py +19 -9
- mteb/models/model_implementations/cohere_v.py +16 -6
- mteb/models/model_implementations/colpali_models.py +10 -6
- mteb/models/model_implementations/colqwen_models.py +24 -38
- mteb/models/model_implementations/colsmol_models.py +5 -3
- mteb/models/model_implementations/conan_models.py +12 -5
- mteb/models/model_implementations/dino_models.py +70 -46
- mteb/models/model_implementations/e5_instruct.py +27 -4
- mteb/models/model_implementations/e5_models.py +18 -9
- mteb/models/model_implementations/e5_v.py +16 -10
- mteb/models/model_implementations/eagerworks_models.py +12 -5
- mteb/models/model_implementations/emillykkejensen_models.py +9 -6
- mteb/models/model_implementations/en_code_retriever.py +2 -1
- mteb/models/model_implementations/euler_models.py +3 -2
- mteb/models/model_implementations/evaclip_models.py +13 -4
- mteb/models/model_implementations/fa_models.py +18 -9
- mteb/models/model_implementations/facebookai.py +16 -2
- mteb/models/model_implementations/geogpt_models.py +2 -1
- mteb/models/model_implementations/gme_v_models.py +13 -8
- mteb/models/model_implementations/google_models.py +16 -5
- mteb/models/model_implementations/granite_vision_embedding_models.py +8 -6
- mteb/models/model_implementations/gritlm_models.py +5 -2
- mteb/models/model_implementations/gte_models.py +34 -13
- mteb/models/model_implementations/hinvec_models.py +7 -2
- mteb/models/model_implementations/human.py +1 -0
- mteb/models/model_implementations/ibm_granite_models.py +36 -6
- mteb/models/model_implementations/inf_models.py +4 -2
- mteb/models/model_implementations/jasper_models.py +16 -7
- mteb/models/model_implementations/jina_clip.py +58 -14
- mteb/models/model_implementations/jina_models.py +35 -16
- mteb/models/model_implementations/kalm_models.py +24 -12
- mteb/models/model_implementations/kblab.py +13 -6
- mteb/models/model_implementations/kennethenevoldsen_models.py +6 -4
- mteb/models/model_implementations/kfst.py +2 -1
- mteb/models/model_implementations/kowshik24_models.py +2 -1
- mteb/models/model_implementations/lens_models.py +2 -0
- mteb/models/model_implementations/lgai_embedding_models.py +2 -1
- mteb/models/model_implementations/linq_models.py +8 -2
- mteb/models/model_implementations/listconranker.py +11 -5
- mteb/models/model_implementations/llm2clip_models.py +18 -10
- mteb/models/model_implementations/llm2vec_models.py +28 -14
- mteb/models/model_implementations/mcinext_models.py +12 -3
- mteb/models/model_implementations/mdbr_models.py +19 -3
- mteb/models/model_implementations/misc_models.py +131 -68
- mteb/models/model_implementations/mixedbread_ai_models.py +335 -0
- mteb/models/model_implementations/mme5_models.py +3 -2
- mteb/models/model_implementations/moco_models.py +15 -8
- mteb/models/model_implementations/mod_models.py +3 -2
- mteb/models/model_implementations/model2vec_models.py +37 -18
- mteb/models/model_implementations/moka_models.py +4 -1
- mteb/models/model_implementations/nbailab.py +6 -3
- mteb/models/model_implementations/no_instruct_sentence_models.py +15 -7
- mteb/models/model_implementations/nomic_models.py +47 -19
- mteb/models/model_implementations/nomic_models_vision.py +6 -4
- mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +20 -8
- mteb/models/model_implementations/nvidia_models.py +165 -22
- mteb/models/model_implementations/octen_models.py +64 -3
- mteb/models/model_implementations/openai_models.py +14 -4
- mteb/models/model_implementations/openclip_models.py +30 -17
- mteb/models/model_implementations/opensearch_neural_sparse_models.py +20 -9
- mteb/models/model_implementations/ops_moa_models.py +10 -3
- mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +2 -1
- mteb/models/model_implementations/pawan_models.py +2 -1
- mteb/models/model_implementations/piccolo_models.py +3 -1
- mteb/models/model_implementations/pixie_models.py +56 -0
- mteb/models/model_implementations/promptriever_models.py +20 -10
- mteb/models/model_implementations/pylate_models.py +41 -21
- mteb/models/model_implementations/qodo_models.py +4 -2
- mteb/models/model_implementations/qtack_models.py +2 -1
- mteb/models/model_implementations/qwen3_models.py +14 -4
- mteb/models/model_implementations/qzhou_models.py +4 -2
- mteb/models/model_implementations/random_baseline.py +7 -6
- mteb/models/model_implementations/rasgaard_models.py +3 -2
- mteb/models/model_implementations/reasonir_model.py +66 -1
- mteb/models/model_implementations/repllama_models.py +18 -9
- mteb/models/model_implementations/rerankers_custom.py +25 -10
- mteb/models/model_implementations/rerankers_monot5_based.py +41 -21
- mteb/models/model_implementations/richinfoai_models.py +2 -1
- mteb/models/model_implementations/ru_sentence_models.py +40 -20
- mteb/models/model_implementations/ruri_models.py +20 -10
- mteb/models/model_implementations/salesforce_models.py +13 -4
- mteb/models/model_implementations/samilpwc_models.py +2 -1
- mteb/models/model_implementations/sarashina_embedding_models.py +4 -2
- mteb/models/model_implementations/searchmap_models.py +2 -1
- mteb/models/model_implementations/seed_1_6_embedding_models.py +5 -2
- mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +119 -148
- mteb/models/model_implementations/seed_models.py +2 -1
- mteb/models/model_implementations/sentence_transformers_models.py +142 -22
- mteb/models/model_implementations/shuu_model.py +2 -1
- mteb/models/model_implementations/siglip_models.py +39 -24
- mteb/models/model_implementations/slm_models.py +419 -0
- mteb/models/model_implementations/sonar_models.py +2 -1
- mteb/models/model_implementations/spartan8806_atles_champion.py +2 -1
- mteb/models/model_implementations/stella_models.py +23 -4
- mteb/models/model_implementations/tarka_models.py +4 -2
- mteb/models/model_implementations/text2vec_models.py +12 -3
- mteb/models/model_implementations/ua_sentence_models.py +2 -1
- mteb/models/model_implementations/uae_models.py +17 -5
- mteb/models/model_implementations/vdr_models.py +9 -2
- mteb/models/model_implementations/vi_vn_models.py +12 -6
- mteb/models/model_implementations/vista_models.py +11 -4
- mteb/models/model_implementations/vlm2vec_models.py +14 -7
- mteb/models/model_implementations/voyage_models.py +136 -4
- mteb/models/model_implementations/voyage_v.py +17 -10
- mteb/models/model_implementations/xyz_models.py +1 -0
- mteb/models/model_implementations/youtu_models.py +2 -1
- mteb/models/model_implementations/yuan_models.py +2 -1
- mteb/models/model_implementations/yuan_models_en.py +3 -2
- mteb/models/model_meta.py +127 -40
- mteb/models/models_protocols.py +43 -22
- mteb/models/search_encoder_index/search_backend_protocol.py +7 -3
- mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +21 -10
- mteb/models/search_wrappers.py +63 -29
- mteb/models/sentence_transformer_wrapper.py +52 -26
- mteb/models/vllm_wrapper.py +329 -0
- mteb/py.typed +0 -0
- mteb/results/benchmark_results.py +48 -35
- mteb/results/model_result.py +68 -32
- mteb/results/task_result.py +110 -72
- mteb/similarity_functions.py +19 -9
- mteb/tasks/aggregated_tasks/eng/cqadupstack_retrieval.py +3 -3
- mteb/tasks/aggregated_tasks/eng/sts17_multilingual_visual_sts_eng.py +3 -3
- mteb/tasks/aggregated_tasks/eng/sts_benchmark_multilingual_visual_sts_eng.py +3 -3
- mteb/tasks/aggregated_tasks/fas/cqadupstack_retrieval_fa.py +3 -3
- mteb/tasks/aggregated_tasks/fas/syn_per_chatbot_conv_sa_classification.py +3 -3
- mteb/tasks/aggregated_tasks/multilingual/sts17_multilingual_vision_sts.py +3 -3
- mteb/tasks/aggregated_tasks/multilingual/sts_benchmark_multilingual_visual_sts.py +3 -3
- mteb/tasks/aggregated_tasks/nld/cqadupstack_nl_retrieval.py +3 -3
- mteb/tasks/aggregated_tasks/pol/cqadupstack_retrieval_pl.py +3 -3
- mteb/tasks/bitext_mining/eng/pub_chem_smiles_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/fas/fa_mteb_summary_retrieval.py +3 -3
- mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/flores_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/in22_conv_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/in22_gen_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/norwegian_courts_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/ntrex_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/roma_tales_bitext_mining.py +2 -2
- mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -2
- mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -1
- mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -1
- mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -1
- mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -1
- mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -1
- mteb/tasks/classification/bul/bulgarian_store_review_sentiment_classfication.py +1 -1
- mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +2 -2
- mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -1
- mteb/tasks/classification/dan/dk_hate_classification.py +2 -2
- mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -1
- mteb/tasks/classification/ell/greek_legal_code_classification.py +1 -1
- mteb/tasks/classification/eng/dbpedia_classification.py +2 -2
- mteb/tasks/classification/eng/toxic_chat_classification.py +2 -2
- mteb/tasks/classification/eng/toxic_conversations_classification.py +2 -2
- mteb/tasks/classification/eng/tweet_topic_single_classification.py +1 -1
- mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -1
- mteb/tasks/classification/eng/yelp_review_full_classification.py +2 -2
- mteb/tasks/classification/est/estonian_valence.py +2 -2
- mteb/tasks/classification/fas/fa_mteb_classification.py +6 -6
- mteb/tasks/classification/fas/persian_food_sentiment_classification.py +1 -1
- mteb/tasks/classification/fil/filipino_shopee_reviews_classification.py +1 -1
- mteb/tasks/classification/fin/fin_toxicity_classification.py +1 -1
- mteb/tasks/classification/fra/french_book_reviews.py +2 -2
- mteb/tasks/classification/fra/movie_review_sentiment_classification.py +2 -2
- mteb/tasks/classification/guj/gujarati_news_classification.py +1 -1
- mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -1
- mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -1
- mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +2 -2
- mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -1
- mteb/tasks/classification/ita/dado_eval_coarse_classification.py +1 -1
- mteb/tasks/classification/ita/ita_casehold_classification.py +1 -1
- mteb/tasks/classification/ita/sardi_stance_classification.py +1 -1
- mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -1
- mteb/tasks/classification/jpn/wrime_classification.py +1 -1
- mteb/tasks/classification/kan/kannada_news_classification.py +2 -2
- mteb/tasks/classification/kor/klue_tc.py +2 -2
- mteb/tasks/classification/kor/kor_fin.py +1 -1
- mteb/tasks/classification/kor/kor_hate_classification.py +1 -1
- mteb/tasks/classification/kor/kor_sarcasm_classification.py +1 -1
- mteb/tasks/classification/kur/kurdish_sentiment_classification.py +2 -2
- mteb/tasks/classification/mal/malayalam_news_classification.py +1 -1
- mteb/tasks/classification/mar/marathi_news_classification.py +1 -1
- mteb/tasks/classification/multilingual/afri_senti_lang_classification.py +1 -1
- mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -1
- mteb/tasks/classification/multilingual/cyrillic_turkic_lang_classification.py +1 -1
- mteb/tasks/classification/multilingual/indic_nlp_news_classification.py +1 -1
- mteb/tasks/classification/multilingual/masakha_news_classification.py +1 -1
- mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -1
- mteb/tasks/classification/multilingual/multilingual_sentiment_classification.py +1 -1
- mteb/tasks/classification/multilingual/scala_classification.py +2 -2
- mteb/tasks/classification/multilingual/sib200_classification.py +1 -1
- mteb/tasks/classification/multilingual/turkic_classification.py +1 -1
- mteb/tasks/classification/multilingual/tweet_sentiment_classification.py +1 -1
- mteb/tasks/classification/nep/nepali_news_classification.py +2 -2
- mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +1 -1
- mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +1 -1
- mteb/tasks/classification/ory/odia_news_classification.py +2 -2
- mteb/tasks/classification/pan/punjabi_news_classification.py +1 -1
- mteb/tasks/classification/ron/moroco.py +1 -1
- mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -1
- mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -1
- mteb/tasks/classification/rus/georeview_classification.py +1 -1
- mteb/tasks/classification/rus/headline_classification.py +2 -2
- mteb/tasks/classification/rus/inappropriateness_classification.py +2 -2
- mteb/tasks/classification/rus/ru_reviews_classification.py +2 -2
- mteb/tasks/classification/rus/ru_sci_bench_grnti_classification.py +1 -1
- mteb/tasks/classification/rus/ru_sci_bench_oecd_classification.py +1 -1
- mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -1
- mteb/tasks/classification/san/sanskrit_shlokas_classification.py +1 -1
- mteb/tasks/classification/sin/sinhala_news_classification.py +2 -2
- mteb/tasks/classification/sin/sinhala_news_source_classification.py +2 -2
- mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +2 -2
- mteb/tasks/classification/slv/frenk_sl_classification.py +1 -1
- mteb/tasks/classification/spa/spanish_news_classification.py +2 -2
- mteb/tasks/classification/ssw/siswati_news_classification.py +1 -1
- mteb/tasks/classification/tam/tamil_news_classification.py +2 -2
- mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +2 -2
- mteb/tasks/classification/tha/wongnai_reviews_classification.py +1 -1
- mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +2 -2
- mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -2
- mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -1
- mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -1
- mteb/tasks/classification/zho/yue_openrice_review_classification.py +2 -2
- mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -1
- mteb/tasks/clustering/deu/blurbs_clustering_p2p.py +1 -1
- mteb/tasks/clustering/deu/blurbs_clustering_s2s.py +1 -1
- mteb/tasks/clustering/eng/arxiv_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/arxiv_hierarchical_clustering.py +2 -2
- mteb/tasks/clustering/eng/big_patent_clustering.py +1 -1
- mteb/tasks/clustering/eng/biorxiv_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/biorxiv_clustering_s2s.py +1 -1
- mteb/tasks/clustering/eng/hume_wiki_cities_clustering.py +1 -1
- mteb/tasks/clustering/eng/medrxiv_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/medrxiv_clustering_s2s.py +1 -1
- mteb/tasks/clustering/eng/reddit_clustering.py +1 -1
- mteb/tasks/clustering/eng/reddit_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/stack_exchange_clustering.py +1 -1
- mteb/tasks/clustering/eng/stack_exchange_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/twenty_newsgroups_clustering.py +1 -1
- mteb/tasks/clustering/eng/wiki_cities_clustering.py +1 -1
- mteb/tasks/clustering/fas/fa_mteb_clustering.py +4 -4
- mteb/tasks/clustering/fra/hal_clustering_s2s.py +2 -2
- mteb/tasks/clustering/multilingual/mlsum_clustering_p2p.py +2 -2
- mteb/tasks/clustering/multilingual/mlsum_clustering_s2s.py +2 -2
- mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -1
- mteb/tasks/clustering/multilingual/wiki_clustering_p2p.py +1 -1
- mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +1 -1
- mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +1 -1
- mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +1 -1
- mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +1 -1
- mteb/tasks/clustering/nld/vabb_clustering_p2p.py +1 -1
- mteb/tasks/clustering/nld/vabb_clustering_s2s.py +1 -1
- mteb/tasks/clustering/nob/snl_clustering.py +8 -3
- mteb/tasks/clustering/nob/vg_clustering.py +8 -3
- mteb/tasks/clustering/pol/polish_clustering.py +3 -3
- mteb/tasks/clustering/rus/ru_sci_bench_grnti_clustering_p2p.py +1 -1
- mteb/tasks/clustering/rus/ru_sci_bench_oecd_clustering_p2p.py +1 -1
- mteb/tasks/clustering/zho/cmteb_clustering.py +6 -6
- mteb/tasks/image_text_pair_classification/eng/image_co_de.py +1 -1
- mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +2 -2
- mteb/tasks/instruction_reranking/multilingual/m_follow_ir.py +2 -2
- mteb/tasks/multichoice/eng/cv_bench.py +4 -4
- mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -1
- mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -1
- mteb/tasks/multilabel_classification/rus/ru_toixic_multilabelclassification_okmlcup.py +1 -1
- mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
- mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -1
- mteb/tasks/pair_classification/ara/ar_entail.py +1 -1
- mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -1
- mteb/tasks/pair_classification/deu/false_friends_de_en_pc.py +1 -1
- mteb/tasks/pair_classification/eng/pub_chem_ai_sentence_paraphrase_pc.py +1 -1
- mteb/tasks/pair_classification/eng/pub_chem_smilespc.py +4 -3
- mteb/tasks/pair_classification/eng/pub_chem_synonym_pc.py +1 -1
- mteb/tasks/pair_classification/eng/pub_chem_wiki_paragraphs_pc.py +1 -1
- mteb/tasks/pair_classification/eng/sprint_duplicate_questions_pc.py +1 -1
- mteb/tasks/pair_classification/eng/twitter_sem_eval2015_pc.py +1 -1
- mteb/tasks/pair_classification/eng/twitter_url_corpus_pc.py +1 -1
- mteb/tasks/pair_classification/fas/fa_mteb_pair_classification.py +5 -5
- mteb/tasks/pair_classification/fas/fars_tail.py +2 -2
- mteb/tasks/pair_classification/hye/armenian_paraphrase_pc.py +1 -1
- mteb/tasks/pair_classification/ita/dis_co_tex_pair_classification.py +1 -1
- mteb/tasks/pair_classification/kor/klue_nli.py +1 -1
- mteb/tasks/pair_classification/multilingual/rte3.py +2 -2
- mteb/tasks/pair_classification/multilingual/xnli.py +1 -1
- mteb/tasks/pair_classification/pol/polish_pc.py +4 -4
- mteb/tasks/pair_classification/por/assin2_rte.py +1 -1
- mteb/tasks/pair_classification/por/sick_br_pc.py +1 -1
- mteb/tasks/pair_classification/rus/terra.py +2 -2
- mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -1
- mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -1
- mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -1
- mteb/tasks/pair_classification/zho/cmteb_pair_classification.py +2 -2
- mteb/tasks/reranking/multilingual/wikipedia_reranking_multilingual.py +1 -1
- mteb/tasks/retrieval/ara/sadeem_question_retrieval.py +1 -1
- mteb/tasks/retrieval/code/code_edit_search_retrieval.py +1 -1
- mteb/tasks/retrieval/code/code_rag.py +16 -16
- mteb/tasks/retrieval/code/code_search_net_cc_retrieval.py +1 -1
- mteb/tasks/retrieval/code/coir_code_search_net_retrieval.py +1 -1
- mteb/tasks/retrieval/code/ds1000_retrieval.py +1 -1
- mteb/tasks/retrieval/code/fresh_stack_retrieval.py +1 -1
- mteb/tasks/retrieval/code/human_eval_retrieval.py +1 -1
- mteb/tasks/retrieval/code/mbpp_retrieval.py +1 -1
- mteb/tasks/retrieval/code/wiki_sql_retrieval.py +1 -1
- mteb/tasks/retrieval/dan/dan_fever_retrieval.py +2 -2
- mteb/tasks/retrieval/dan/tv2_nordretrieval.py +3 -3
- mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +3 -3
- mteb/tasks/retrieval/deu/german_gov_service_retrieval.py +1 -1
- mteb/tasks/retrieval/deu/german_qu_ad_retrieval.py +1 -1
- mteb/tasks/retrieval/ell/greek_civics_qa.py +1 -1
- mteb/tasks/retrieval/eng/__init__.py +44 -0
- mteb/tasks/retrieval/eng/bright_retrieval.py +10 -2
- mteb/tasks/retrieval/eng/bright_v1_1_retrieval.py +968 -0
- mteb/tasks/retrieval/eng/chat_doctor_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/chemrxiv.py +33 -0
- mteb/tasks/retrieval/eng/cub200_i2i_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/fin_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/finance_bench_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/hc3_finance_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_narrative_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_needle_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_passkey_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_summ_screen_fd_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_wikim_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lembqm_sum_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/limit_retrieval.py +6 -1
- mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/ml_questions.py +1 -1
- mteb/tasks/retrieval/eng/nano_argu_ana_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_climate_fever_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_db_pedia_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_fever_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_fi_qa2018_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_hotpot_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_msmarco_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_nf_corpus_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_nq_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_quora_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_sci_fact_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_scidocs_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_touche2020_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/narrative_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/r2_med_retrieval.py +8 -8
- mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +10 -10
- mteb/tasks/retrieval/fra/f_qu_ad_retrieval.py +1 -1
- mteb/tasks/retrieval/fra/syntec_retrieval.py +1 -1
- mteb/tasks/retrieval/hun/hun_sum2.py +1 -1
- mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +1 -1
- mteb/tasks/retrieval/kor/__init__.py +15 -1
- mteb/tasks/retrieval/kor/kovidore2_bench_retrieval.py +142 -0
- mteb/tasks/retrieval/multilingual/__init__.py +2 -0
- mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt19.py +1 -1
- mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt21.py +1 -1
- mteb/tasks/retrieval/multilingual/cur_ev1_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/euro_pirq_retrieval.py +43 -0
- mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/mr_tidy_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +5 -5
- mteb/tasks/retrieval/multilingual/statcan_dialogue_dataset_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +14 -4
- mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +90 -100
- mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/x_qu_ad_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_android_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_english_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_gaming_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_gis_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_mathematica_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_physics_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_programmers_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_stats_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_tex_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_unix_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_webmasters_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_wordpress_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nob/norquad.py +3 -3
- mteb/tasks/retrieval/nob/snl_retrieval.py +3 -3
- mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -1
- mteb/tasks/retrieval/tur/tur_hist_quad.py +1 -1
- mteb/tasks/retrieval/vie/__init__.py +14 -6
- mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/fevervn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +48 -0
- mteb/tasks/retrieval/vie/nqvn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/tvpl_retrieval.py +42 -0
- mteb/tasks/retrieval/vie/vie_qu_ad_retrieval.py +1 -1
- mteb/tasks/retrieval/vie/zac_legal_text_retrieval.py +15 -1
- mteb/tasks/sts/fao/faroese_sts.py +1 -1
- mteb/tasks/sts/fra/sick_fr_sts.py +1 -1
- mteb/tasks/sts/kor/klue_sts.py +1 -1
- mteb/tasks/sts/por/sick_br_sts.py +1 -1
- mteb/tasks/sts/rus/ru_para_phraser_sts.py +1 -1
- mteb/tasks/zeroshot_classification/eng/sci_mmir.py +1 -1
- mteb/types/__init__.py +2 -0
- mteb/types/_encoder_io.py +13 -1
- mteb/types/_result.py +2 -1
- mteb/types/statistics.py +18 -5
- {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/METADATA +15 -4
- {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/RECORD +528 -486
- {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/WHEEL +1 -1
- mteb/models/model_implementations/mxbai_models.py +0 -111
- {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/entry_points.txt +0 -0
- {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/top_level.txt +0 -0
|
@@ -1,16 +1,15 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import hashlib
|
|
2
4
|
import logging
|
|
3
5
|
from collections import defaultdict
|
|
4
|
-
from
|
|
6
|
+
from typing import TYPE_CHECKING
|
|
5
7
|
|
|
6
8
|
import numpy as np
|
|
7
9
|
from datasets import Dataset
|
|
8
10
|
from sklearn.metrics import average_precision_score
|
|
9
11
|
|
|
10
12
|
from mteb._evaluators import PairClassificationEvaluator
|
|
11
|
-
from mteb._evaluators.pair_classification_evaluator import (
|
|
12
|
-
PairClassificationDistances,
|
|
13
|
-
)
|
|
14
13
|
from mteb.abstasks._statistics_calculation import (
|
|
15
14
|
calculate_image_statistics,
|
|
16
15
|
calculate_label_statistics,
|
|
@@ -19,14 +18,25 @@ from mteb.abstasks._statistics_calculation import (
|
|
|
19
18
|
from mteb.abstasks.abstask import AbsTask
|
|
20
19
|
from mteb.models.model_meta import ScoringFunction
|
|
21
20
|
from mteb.models.models_protocols import EncoderProtocol
|
|
22
|
-
from mteb.types import PromptType
|
|
23
21
|
from mteb.types.statistics import (
|
|
24
|
-
ImageStatistics,
|
|
25
|
-
LabelStatistics,
|
|
26
22
|
SplitDescriptiveStatistics,
|
|
27
|
-
TextStatistics,
|
|
28
23
|
)
|
|
29
24
|
|
|
25
|
+
if TYPE_CHECKING:
|
|
26
|
+
from pathlib import Path
|
|
27
|
+
|
|
28
|
+
from mteb._evaluators.pair_classification_evaluator import (
|
|
29
|
+
PairClassificationDistances,
|
|
30
|
+
)
|
|
31
|
+
from mteb.models.models_protocols import MTEBModels
|
|
32
|
+
from mteb.types import EncodeKwargs, PromptType
|
|
33
|
+
from mteb.types.statistics import (
|
|
34
|
+
ImageStatistics,
|
|
35
|
+
LabelStatistics,
|
|
36
|
+
TextStatistics,
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
|
|
30
40
|
logger = logging.getLogger(__name__)
|
|
31
41
|
|
|
32
42
|
|
|
@@ -44,8 +54,8 @@ class PairClassificationDescriptiveStatistics(SplitDescriptiveStatistics):
|
|
|
44
54
|
"""
|
|
45
55
|
|
|
46
56
|
num_samples: int
|
|
47
|
-
number_of_characters: int
|
|
48
|
-
unique_pairs: int
|
|
57
|
+
number_of_characters: int | None
|
|
58
|
+
unique_pairs: int | None
|
|
49
59
|
|
|
50
60
|
text1_statistics: TextStatistics | None
|
|
51
61
|
image1_statistics: ImageStatistics | None
|
|
@@ -79,15 +89,19 @@ class AbsTaskPairClassification(AbsTask):
|
|
|
79
89
|
|
|
80
90
|
def _evaluate_subset(
|
|
81
91
|
self,
|
|
82
|
-
model:
|
|
92
|
+
model: MTEBModels,
|
|
83
93
|
data_split: Dataset,
|
|
84
94
|
*,
|
|
85
95
|
hf_split: str,
|
|
86
96
|
hf_subset: str,
|
|
87
|
-
encode_kwargs:
|
|
97
|
+
encode_kwargs: EncodeKwargs,
|
|
88
98
|
prediction_folder: Path | None = None,
|
|
99
|
+
num_proc: int = 1,
|
|
89
100
|
**kwargs,
|
|
90
101
|
) -> dict[str, float]:
|
|
102
|
+
if not isinstance(model, EncoderProtocol):
|
|
103
|
+
raise TypeError("Expected model to be an instance of EncoderProtocol")
|
|
104
|
+
|
|
91
105
|
if self.metadata.modalities == ["text"]:
|
|
92
106
|
# for compatibility with v1 version where datasets were stored in a single row
|
|
93
107
|
data_split = data_split[0] if len(data_split) == 1 else data_split
|
|
@@ -102,7 +116,11 @@ class AbsTaskPairClassification(AbsTask):
|
|
|
102
116
|
input2_prompt_type=self.input2_prompt_type,
|
|
103
117
|
**kwargs,
|
|
104
118
|
)
|
|
105
|
-
similarity_scores = evaluator(
|
|
119
|
+
similarity_scores = evaluator(
|
|
120
|
+
model,
|
|
121
|
+
encode_kwargs=encode_kwargs,
|
|
122
|
+
num_proc=num_proc,
|
|
123
|
+
)
|
|
106
124
|
|
|
107
125
|
if prediction_folder:
|
|
108
126
|
self._save_task_predictions(
|
|
@@ -120,7 +138,7 @@ class AbsTaskPairClassification(AbsTask):
|
|
|
120
138
|
self, similarity_scores: PairClassificationDistances, labels: list[int]
|
|
121
139
|
) -> dict[str, float]:
|
|
122
140
|
logger.info("Computing metrics...")
|
|
123
|
-
|
|
141
|
+
np_labels = np.asarray(labels)
|
|
124
142
|
output_scores = {}
|
|
125
143
|
max_scores = defaultdict(list)
|
|
126
144
|
for short_name, scores, reverse in [
|
|
@@ -142,7 +160,7 @@ class AbsTaskPairClassification(AbsTask):
|
|
|
142
160
|
],
|
|
143
161
|
[ScoringFunction.DOT_PRODUCT.value, similarity_scores["dot_scores"], True],
|
|
144
162
|
]:
|
|
145
|
-
metrics = self._compute_metrics_values(scores,
|
|
163
|
+
metrics = self._compute_metrics_values(scores, np_labels, reverse) # type: ignore[arg-type]
|
|
146
164
|
for metric_name, metric_value in metrics.items():
|
|
147
165
|
output_scores[f"{short_name}_{metric_name}"] = metric_value
|
|
148
166
|
max_scores[metric_name].append(metric_value)
|
|
@@ -235,8 +253,14 @@ class AbsTaskPairClassification(AbsTask):
|
|
|
235
253
|
labels_statistics=calculate_label_statistics(labels),
|
|
236
254
|
)
|
|
237
255
|
|
|
238
|
-
def _push_dataset_to_hub(self, repo_name: str) -> None:
|
|
256
|
+
def _push_dataset_to_hub(self, repo_name: str, num_proc: int = 1) -> None:
|
|
239
257
|
# previously pair classification datasets were stored in a single row
|
|
258
|
+
if self.dataset is None:
|
|
259
|
+
# overall this shouldn't happen as we check for dataset before pushing to hub
|
|
260
|
+
# added here for type checking purposes
|
|
261
|
+
raise RuntimeError(
|
|
262
|
+
"Dataset not loaded. To load dataset run `task.load_data()`."
|
|
263
|
+
)
|
|
240
264
|
if self.metadata.is_multilingual:
|
|
241
265
|
for subset in self.dataset:
|
|
242
266
|
for split in self.dataset[subset]:
|
|
@@ -253,6 +277,7 @@ class AbsTaskPairClassification(AbsTask):
|
|
|
253
277
|
self.input2_column_name,
|
|
254
278
|
self.label_column_name,
|
|
255
279
|
],
|
|
280
|
+
num_proc=num_proc,
|
|
256
281
|
)
|
|
257
282
|
|
|
258
283
|
def _compute_metrics_values(
|
|
@@ -290,13 +315,13 @@ class AbsTaskPairClassification(AbsTask):
|
|
|
290
315
|
)
|
|
291
316
|
|
|
292
317
|
def _find_best_acc_and_threshold(
|
|
293
|
-
self, scores:
|
|
318
|
+
self, scores: list[float], labels: np.ndarray, high_score_more_similar: bool
|
|
294
319
|
) -> tuple[float, float]:
|
|
295
320
|
rows = list(zip(scores, labels))
|
|
296
321
|
rows = sorted(rows, key=lambda x: x[0], reverse=high_score_more_similar)
|
|
297
322
|
|
|
298
323
|
max_acc = 0
|
|
299
|
-
best_threshold = -1
|
|
324
|
+
best_threshold = -1.0
|
|
300
325
|
positive_so_far = 0
|
|
301
326
|
remaining_negatives = sum(np.array(labels) == 0)
|
|
302
327
|
|
|
@@ -323,7 +348,7 @@ class AbsTaskPairClassification(AbsTask):
|
|
|
323
348
|
|
|
324
349
|
rows = sorted(rows, key=lambda x: x[0], reverse=high_score_more_similar)
|
|
325
350
|
|
|
326
|
-
best_f1 = best_precision = best_recall = 0
|
|
351
|
+
best_f1 = best_precision = best_recall = 0.0
|
|
327
352
|
threshold = 0
|
|
328
353
|
nextract = 0
|
|
329
354
|
ncorrect = 0
|
mteb/abstasks/regression.py
CHANGED
|
@@ -1,29 +1,37 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import logging
|
|
2
|
-
from typing import TypedDict
|
|
4
|
+
from typing import TYPE_CHECKING, TypedDict
|
|
3
5
|
|
|
4
6
|
import datasets
|
|
5
7
|
import numpy as np
|
|
6
8
|
import pandas as pd
|
|
7
|
-
from datasets import Dataset
|
|
8
9
|
from scipy.stats import kendalltau
|
|
9
10
|
from sklearn.linear_model import LinearRegression
|
|
10
11
|
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
|
|
11
12
|
|
|
12
|
-
from mteb._evaluators.sklearn_evaluator import SklearnEvaluator
|
|
13
|
+
from mteb._evaluators.sklearn_evaluator import SklearnEvaluator
|
|
13
14
|
from mteb.abstasks._statistics_calculation import (
|
|
14
15
|
calculate_image_statistics,
|
|
15
16
|
calculate_score_statistics,
|
|
16
17
|
calculate_text_statistics,
|
|
17
18
|
)
|
|
18
19
|
from mteb.types.statistics import (
|
|
19
|
-
ImageStatistics,
|
|
20
|
-
ScoreStatistics,
|
|
21
20
|
SplitDescriptiveStatistics,
|
|
22
|
-
TextStatistics,
|
|
23
21
|
)
|
|
24
22
|
|
|
25
23
|
from .classification import AbsTaskClassification
|
|
26
24
|
|
|
25
|
+
if TYPE_CHECKING:
|
|
26
|
+
from datasets import Dataset
|
|
27
|
+
|
|
28
|
+
from mteb._evaluators.sklearn_evaluator import SklearnModelProtocol
|
|
29
|
+
from mteb.types.statistics import (
|
|
30
|
+
ImageStatistics,
|
|
31
|
+
ScoreStatistics,
|
|
32
|
+
TextStatistics,
|
|
33
|
+
)
|
|
34
|
+
|
|
27
35
|
logger = logging.getLogger(__name__)
|
|
28
36
|
|
|
29
37
|
|
|
@@ -84,10 +92,10 @@ class AbsTaskRegression(AbsTaskClassification):
|
|
|
84
92
|
n_samples: Number of samples to use for training the regression model. If the dataset has fewer samples than n_samples, all samples are used.
|
|
85
93
|
abstask_prompt: Prompt to use for the task for instruction model if not prompt is provided in TaskMetadata.prompt.
|
|
86
94
|
evaluator_model: The model to use for evaluation. Can be any sklearn compatible model. Default is `LinearRegression`.
|
|
87
|
-
|
|
95
|
+
|
|
88
96
|
"""
|
|
89
97
|
|
|
90
|
-
evaluator: type[
|
|
98
|
+
evaluator: type[SklearnEvaluator] = SklearnEvaluator
|
|
91
99
|
evaluator_model: SklearnModelProtocol = LinearRegression(n_jobs=-1)
|
|
92
100
|
|
|
93
101
|
train_split: str = "train"
|
|
@@ -113,7 +121,7 @@ class AbsTaskRegression(AbsTaskClassification):
|
|
|
113
121
|
)["train"]
|
|
114
122
|
return train_split_sampled, []
|
|
115
123
|
|
|
116
|
-
def _calculate_scores(
|
|
124
|
+
def _calculate_scores( # type: ignore[override]
|
|
117
125
|
self,
|
|
118
126
|
y_test: np.ndarray | list[int],
|
|
119
127
|
y_pred: np.ndarray,
|
|
@@ -183,7 +191,7 @@ class AbsTaskRegression(AbsTaskClassification):
|
|
|
183
191
|
|
|
184
192
|
return dataset_dict
|
|
185
193
|
|
|
186
|
-
def _calculate_descriptive_statistics_from_split(
|
|
194
|
+
def _calculate_descriptive_statistics_from_split( # type: ignore[override]
|
|
187
195
|
self, split: str, hf_subset: str | None = None, compute_overall: bool = False
|
|
188
196
|
) -> RegressionDescriptiveStatistics:
|
|
189
197
|
train_text = []
|
mteb/abstasks/retrieval.py
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import json
|
|
2
4
|
import logging
|
|
3
5
|
from collections import defaultdict
|
|
4
|
-
from collections.abc import Callable, Sequence
|
|
5
6
|
from pathlib import Path
|
|
6
7
|
from time import time
|
|
7
|
-
from typing import Any, Literal
|
|
8
|
+
from typing import TYPE_CHECKING, Any, Literal
|
|
8
9
|
|
|
9
10
|
from datasets import Dataset, DatasetDict, concatenate_datasets
|
|
10
|
-
from typing_extensions import Self
|
|
11
11
|
|
|
12
12
|
from mteb._create_dataloaders import (
|
|
13
13
|
_combine_queries_with_instruction_text,
|
|
@@ -19,24 +19,12 @@ from mteb._evaluators.retrieval_metrics import make_score_dict
|
|
|
19
19
|
from mteb.models import (
|
|
20
20
|
CrossEncoderProtocol,
|
|
21
21
|
EncoderProtocol,
|
|
22
|
-
MTEBModels,
|
|
23
22
|
SearchCrossEncoderWrapper,
|
|
24
23
|
SearchEncoderWrapper,
|
|
25
24
|
SearchProtocol,
|
|
26
25
|
)
|
|
27
|
-
from mteb.types import (
|
|
28
|
-
HFSubset,
|
|
29
|
-
QueryDatasetType,
|
|
30
|
-
RelevantDocumentsType,
|
|
31
|
-
RetrievalOutputType,
|
|
32
|
-
ScoresDict,
|
|
33
|
-
)
|
|
34
26
|
from mteb.types.statistics import (
|
|
35
|
-
ImageStatistics,
|
|
36
|
-
RelevantDocsStatistics,
|
|
37
27
|
SplitDescriptiveStatistics,
|
|
38
|
-
TextStatistics,
|
|
39
|
-
TopRankedStatistics,
|
|
40
28
|
)
|
|
41
29
|
|
|
42
30
|
from ._statistics_calculation import (
|
|
@@ -52,6 +40,30 @@ from .retrieval_dataset_loaders import (
|
|
|
52
40
|
_combine_queries_with_instructions_datasets,
|
|
53
41
|
)
|
|
54
42
|
|
|
43
|
+
if TYPE_CHECKING:
|
|
44
|
+
from collections.abc import Callable, Mapping, Sequence
|
|
45
|
+
|
|
46
|
+
from typing_extensions import Self
|
|
47
|
+
|
|
48
|
+
from mteb.models import (
|
|
49
|
+
MTEBModels,
|
|
50
|
+
)
|
|
51
|
+
from mteb.types import (
|
|
52
|
+
EncodeKwargs,
|
|
53
|
+
HFSubset,
|
|
54
|
+
QueryDatasetType,
|
|
55
|
+
RelevantDocumentsType,
|
|
56
|
+
RetrievalOutputType,
|
|
57
|
+
ScoresDict,
|
|
58
|
+
)
|
|
59
|
+
from mteb.types.statistics import (
|
|
60
|
+
ImageStatistics,
|
|
61
|
+
RelevantDocsStatistics,
|
|
62
|
+
TextStatistics,
|
|
63
|
+
TopRankedStatistics,
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
|
|
55
67
|
logger = logging.getLogger(__name__)
|
|
56
68
|
|
|
57
69
|
|
|
@@ -136,7 +148,7 @@ class AbsTaskRetrieval(AbsTask):
|
|
|
136
148
|
)
|
|
137
149
|
)
|
|
138
150
|
|
|
139
|
-
def convert_v1_dataset_format_to_v2(self):
|
|
151
|
+
def convert_v1_dataset_format_to_v2(self, num_proc: int) -> None:
|
|
140
152
|
"""Convert dataset from v1 (from `self.queries`, `self.document`) format to v2 format (`self.dotaset`)."""
|
|
141
153
|
# check if dataset is `v1` version
|
|
142
154
|
if not hasattr(self, "queries"):
|
|
@@ -184,17 +196,17 @@ class AbsTaskRetrieval(AbsTask):
|
|
|
184
196
|
return queries, corpus
|
|
185
197
|
|
|
186
198
|
if self.metadata.is_multilingual:
|
|
187
|
-
for subset in self.queries:
|
|
188
|
-
for split in self.queries[subset]:
|
|
189
|
-
queries = self.queries[subset][split]
|
|
190
|
-
corpus = self.corpus[subset][split]
|
|
199
|
+
for subset in self.queries: # type: ignore[attr-defined]
|
|
200
|
+
for split in self.queries[subset]: # type: ignore[attr-defined]
|
|
201
|
+
queries = self.queries[subset][split] # type: ignore[attr-defined]
|
|
202
|
+
corpus = self.corpus[subset][split] # type: ignore[attr-defined]
|
|
191
203
|
|
|
192
204
|
(
|
|
193
205
|
self.dataset[subset][split]["queries"],
|
|
194
206
|
self.dataset[subset][split]["corpus"],
|
|
195
207
|
) = _process_split(queries, corpus)
|
|
196
208
|
|
|
197
|
-
self.dataset[subset][split]["relevant_docs"] = self.relevant_docs[
|
|
209
|
+
self.dataset[subset][split]["relevant_docs"] = self.relevant_docs[ # type: ignore[attr-defined]
|
|
198
210
|
subset
|
|
199
211
|
][split]
|
|
200
212
|
if hasattr(self, "instructions"):
|
|
@@ -203,6 +215,7 @@ class AbsTaskRetrieval(AbsTask):
|
|
|
203
215
|
_combine_queries_with_instructions_datasets(
|
|
204
216
|
self.dataset[subset][split]["queries"],
|
|
205
217
|
instructions,
|
|
218
|
+
num_proc,
|
|
206
219
|
)
|
|
207
220
|
)
|
|
208
221
|
if hasattr(self, "top_ranked"):
|
|
@@ -211,15 +224,15 @@ class AbsTaskRetrieval(AbsTask):
|
|
|
211
224
|
][split]
|
|
212
225
|
else:
|
|
213
226
|
subset = "default"
|
|
214
|
-
for split in self.queries:
|
|
215
|
-
queries = self.queries[split]
|
|
216
|
-
corpus = self.corpus[split]
|
|
227
|
+
for split in self.queries: # type: ignore[attr-defined]
|
|
228
|
+
queries = self.queries[split] # type: ignore[attr-defined]
|
|
229
|
+
corpus = self.corpus[split] # type: ignore[attr-defined]
|
|
217
230
|
(
|
|
218
231
|
self.dataset[subset][split]["queries"],
|
|
219
232
|
self.dataset[subset][split]["corpus"],
|
|
220
233
|
) = _process_split(queries, corpus)
|
|
221
234
|
|
|
222
|
-
self.dataset[subset][split]["relevant_docs"] = self.relevant_docs[
|
|
235
|
+
self.dataset[subset][split]["relevant_docs"] = self.relevant_docs[ # type: ignore[attr-defined]
|
|
223
236
|
split
|
|
224
237
|
].copy()
|
|
225
238
|
if hasattr(self, "instructions"):
|
|
@@ -228,28 +241,29 @@ class AbsTaskRetrieval(AbsTask):
|
|
|
228
241
|
_combine_queries_with_instructions_datasets(
|
|
229
242
|
self.dataset[subset][split]["queries"],
|
|
230
243
|
instructions,
|
|
244
|
+
num_proc,
|
|
231
245
|
)
|
|
232
246
|
)
|
|
233
|
-
if hasattr(self, "top_ranked"):
|
|
247
|
+
if hasattr(self, "top_ranked") and self.top_ranked:
|
|
234
248
|
self.dataset[subset][split]["top_ranked"] = self.top_ranked[
|
|
235
249
|
split
|
|
236
250
|
].copy()
|
|
237
251
|
|
|
238
|
-
del self.queries
|
|
239
|
-
del self.corpus
|
|
240
|
-
del self.relevant_docs
|
|
252
|
+
del self.queries # type: ignore[attr-defined]
|
|
253
|
+
del self.corpus # type: ignore[attr-defined]
|
|
254
|
+
del self.relevant_docs # type: ignore[attr-defined]
|
|
241
255
|
if hasattr(self, "instructions"):
|
|
242
256
|
del self.instructions
|
|
243
257
|
if hasattr(self, "top_ranked"):
|
|
244
258
|
del self.top_ranked
|
|
245
259
|
|
|
246
|
-
def load_data(self) -> None:
|
|
260
|
+
def load_data(self, num_proc: int = 1, **kwargs) -> None:
|
|
247
261
|
"""Load the dataset for the retrieval task."""
|
|
248
262
|
if self.data_loaded:
|
|
249
263
|
return
|
|
250
264
|
|
|
251
265
|
dataset_path = self.metadata.dataset["path"]
|
|
252
|
-
eval_splits = self.
|
|
266
|
+
eval_splits = self.eval_splits
|
|
253
267
|
trust_remote_code = self.metadata.dataset.get("trust_remote_code", False)
|
|
254
268
|
revision = self.metadata.dataset["revision"]
|
|
255
269
|
|
|
@@ -265,16 +279,18 @@ class AbsTaskRetrieval(AbsTask):
|
|
|
265
279
|
trust_remote_code=trust_remote_code,
|
|
266
280
|
split=split,
|
|
267
281
|
config=hf_subset,
|
|
268
|
-
).load(
|
|
282
|
+
).load(
|
|
283
|
+
num_proc=num_proc,
|
|
284
|
+
)
|
|
269
285
|
|
|
270
286
|
if self.metadata.is_multilingual:
|
|
271
|
-
for lang in self.
|
|
287
|
+
for lang in self.hf_subsets:
|
|
272
288
|
for split in eval_splits:
|
|
273
289
|
_process_data(split, lang)
|
|
274
290
|
else:
|
|
275
291
|
for split in eval_splits:
|
|
276
292
|
_process_data(split)
|
|
277
|
-
self.dataset_transform()
|
|
293
|
+
self.dataset_transform(num_proc=num_proc)
|
|
278
294
|
self.data_loaded = True
|
|
279
295
|
|
|
280
296
|
def evaluate(
|
|
@@ -283,10 +299,11 @@ class AbsTaskRetrieval(AbsTask):
|
|
|
283
299
|
split: str = "test",
|
|
284
300
|
subsets_to_run: list[HFSubset] | None = None,
|
|
285
301
|
*,
|
|
286
|
-
encode_kwargs:
|
|
302
|
+
encode_kwargs: EncodeKwargs,
|
|
287
303
|
prediction_folder: Path | None = None,
|
|
288
|
-
|
|
289
|
-
|
|
304
|
+
num_proc: int = 1,
|
|
305
|
+
**kwargs: Any,
|
|
306
|
+
) -> Mapping[HFSubset, ScoresDict]:
|
|
290
307
|
"""Evaluate the model on the retrieval task.
|
|
291
308
|
|
|
292
309
|
Args:
|
|
@@ -296,16 +313,16 @@ class AbsTaskRetrieval(AbsTask):
|
|
|
296
313
|
subsets_to_run: Optional list of subsets to evaluate on
|
|
297
314
|
encode_kwargs: Keyword arguments passed to the encoder
|
|
298
315
|
prediction_folder: Folder to save model predictions
|
|
316
|
+
num_proc: Number of processes to use
|
|
299
317
|
**kwargs: Additional keyword arguments passed to the evaluator
|
|
300
318
|
|
|
301
|
-
|
|
302
319
|
Returns:
|
|
303
320
|
Dictionary mapping subsets to their evaluation scores
|
|
304
321
|
"""
|
|
305
322
|
if not self.data_loaded:
|
|
306
|
-
self.load_data()
|
|
323
|
+
self.load_data(num_proc=num_proc)
|
|
307
324
|
# TODO: convert all tasks directly https://github.com/embeddings-benchmark/mteb/issues/2030
|
|
308
|
-
self.convert_v1_dataset_format_to_v2()
|
|
325
|
+
self.convert_v1_dataset_format_to_v2(num_proc=num_proc)
|
|
309
326
|
|
|
310
327
|
return super().evaluate(
|
|
311
328
|
model,
|
|
@@ -313,6 +330,7 @@ class AbsTaskRetrieval(AbsTask):
|
|
|
313
330
|
subsets_to_run,
|
|
314
331
|
encode_kwargs=encode_kwargs,
|
|
315
332
|
prediction_folder=prediction_folder,
|
|
333
|
+
num_proc=num_proc,
|
|
316
334
|
**kwargs,
|
|
317
335
|
)
|
|
318
336
|
|
|
@@ -320,10 +338,11 @@ class AbsTaskRetrieval(AbsTask):
|
|
|
320
338
|
self,
|
|
321
339
|
model: MTEBModels,
|
|
322
340
|
data_split: RetrievalSplitData,
|
|
323
|
-
encode_kwargs:
|
|
341
|
+
encode_kwargs: EncodeKwargs,
|
|
324
342
|
hf_split: str,
|
|
325
343
|
hf_subset: str,
|
|
326
344
|
prediction_folder: Path | None = None,
|
|
345
|
+
num_proc: int = 1,
|
|
327
346
|
**kwargs,
|
|
328
347
|
) -> ScoresDict:
|
|
329
348
|
"""Evaluate a model on a specific subset of the data.
|
|
@@ -335,6 +354,7 @@ class AbsTaskRetrieval(AbsTask):
|
|
|
335
354
|
hf_split: Split to evaluate on
|
|
336
355
|
hf_subset: Subset to evaluate on
|
|
337
356
|
prediction_folder: Folder with results prediction
|
|
357
|
+
num_proc: Number of processes to use
|
|
338
358
|
**kwargs: Additional keyword arguments passed to the evaluator
|
|
339
359
|
|
|
340
360
|
Returns:
|
|
@@ -357,6 +377,8 @@ class AbsTaskRetrieval(AbsTask):
|
|
|
357
377
|
**kwargs,
|
|
358
378
|
)
|
|
359
379
|
|
|
380
|
+
search_model: SearchProtocol
|
|
381
|
+
|
|
360
382
|
if isinstance(model, EncoderProtocol) and not isinstance(model, SearchProtocol):
|
|
361
383
|
search_model = SearchEncoderWrapper(model)
|
|
362
384
|
elif isinstance(model, CrossEncoderProtocol):
|
|
@@ -372,6 +394,7 @@ class AbsTaskRetrieval(AbsTask):
|
|
|
372
394
|
results = retriever(
|
|
373
395
|
search_model,
|
|
374
396
|
encode_kwargs=encode_kwargs,
|
|
397
|
+
num_proc=num_proc,
|
|
375
398
|
)
|
|
376
399
|
end_time = time()
|
|
377
400
|
logger.debug(
|
|
@@ -446,9 +469,13 @@ class AbsTaskRetrieval(AbsTask):
|
|
|
446
469
|
return {}
|
|
447
470
|
|
|
448
471
|
def _calculate_descriptive_statistics_from_split(
|
|
449
|
-
self,
|
|
472
|
+
self,
|
|
473
|
+
split: str,
|
|
474
|
+
hf_subset: str | None = None,
|
|
475
|
+
compute_overall: bool = False,
|
|
476
|
+
num_proc: int = 1,
|
|
450
477
|
) -> RetrievalDescriptiveStatistics:
|
|
451
|
-
self.convert_v1_dataset_format_to_v2()
|
|
478
|
+
self.convert_v1_dataset_format_to_v2(num_proc)
|
|
452
479
|
if hf_subset and hf_subset in self.dataset:
|
|
453
480
|
split_data = self.dataset[hf_subset][split]
|
|
454
481
|
queries = split_data["queries"]
|
|
@@ -553,8 +580,8 @@ class AbsTaskRetrieval(AbsTask):
|
|
|
553
580
|
top_ranked_statistics=top_ranked_statistics,
|
|
554
581
|
)
|
|
555
582
|
|
|
556
|
-
def _push_dataset_to_hub(self, repo_name: str) -> None:
|
|
557
|
-
self.convert_v1_dataset_format_to_v2()
|
|
583
|
+
def _push_dataset_to_hub(self, repo_name: str, num_proc: int = 1) -> None:
|
|
584
|
+
self.convert_v1_dataset_format_to_v2(num_proc)
|
|
558
585
|
|
|
559
586
|
def _push_section(
|
|
560
587
|
data: dict[str, RetrievalSplitData],
|
|
@@ -578,11 +605,12 @@ class AbsTaskRetrieval(AbsTask):
|
|
|
578
605
|
if isinstance(data[split][subset_item], Dataset):
|
|
579
606
|
sections[split] = data[split][subset_item]
|
|
580
607
|
elif converter is not None:
|
|
608
|
+
subset_data = data[split][subset_item]
|
|
609
|
+
if subset_data is None:
|
|
610
|
+
continue
|
|
611
|
+
|
|
581
612
|
sections[split] = Dataset.from_list(
|
|
582
|
-
[
|
|
583
|
-
converter(idx, item)
|
|
584
|
-
for idx, item in data[split][subset_item].items()
|
|
585
|
-
]
|
|
613
|
+
[converter(idx, item) for idx, item in subset_data.items()]
|
|
586
614
|
)
|
|
587
615
|
else:
|
|
588
616
|
raise ValueError(
|
|
@@ -593,6 +621,7 @@ class AbsTaskRetrieval(AbsTask):
|
|
|
593
621
|
repo_name,
|
|
594
622
|
hf_subset_name,
|
|
595
623
|
commit_message=f"Add {hf_subset_name}-{subset_item}",
|
|
624
|
+
num_proc=num_proc,
|
|
596
625
|
)
|
|
597
626
|
|
|
598
627
|
for subset in self.dataset:
|
|
@@ -626,6 +655,7 @@ class AbsTaskRetrieval(AbsTask):
|
|
|
626
655
|
repo_name,
|
|
627
656
|
f"{subset}-qrels" if subset != "default" else "qrels",
|
|
628
657
|
commit_message=f"Add {subset}-qrels",
|
|
658
|
+
num_proc=num_proc,
|
|
629
659
|
)
|
|
630
660
|
|
|
631
661
|
_push_section(
|
|
@@ -680,7 +710,7 @@ class AbsTaskRetrieval(AbsTask):
|
|
|
680
710
|
|
|
681
711
|
top_k_sorted = defaultdict(list)
|
|
682
712
|
for query_id, values in top_ranked.items():
|
|
683
|
-
sorted_keys = sorted(values, key=values
|
|
713
|
+
sorted_keys = sorted(values, key=lambda k: values[k], reverse=True)
|
|
684
714
|
top_k_sorted[query_id] = sorted_keys[: self._top_k]
|
|
685
715
|
|
|
686
716
|
self.dataset[subset][split]["top_ranked"] = top_k_sorted
|
|
@@ -688,10 +718,10 @@ class AbsTaskRetrieval(AbsTask):
|
|
|
688
718
|
|
|
689
719
|
|
|
690
720
|
def _process_relevant_docs(
|
|
691
|
-
collection:
|
|
721
|
+
collection: Mapping[str, Mapping[str, int]],
|
|
692
722
|
hf_subset: str,
|
|
693
723
|
split: str,
|
|
694
|
-
) -> dict[str, dict[str,
|
|
724
|
+
) -> dict[str, dict[str, int]]:
|
|
695
725
|
"""Collections can contain overlapping ids in different splits. Prepend split and subset to avoid this
|
|
696
726
|
|
|
697
727
|
Returns:
|