mteb 2.5.2__py3-none-any.whl → 2.7.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/__init__.py +2 -0
- mteb/_create_dataloaders.py +78 -30
- mteb/_evaluators/any_sts_evaluator.py +13 -6
- mteb/_evaluators/clustering_evaluator.py +13 -5
- mteb/_evaluators/evaluator.py +12 -4
- mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +22 -11
- mteb/_evaluators/pair_classification_evaluator.py +17 -7
- mteb/_evaluators/retrieval_evaluator.py +23 -14
- mteb/_evaluators/retrieval_metrics.py +26 -19
- mteb/_evaluators/sklearn_evaluator.py +27 -17
- mteb/_evaluators/text/bitext_mining_evaluator.py +36 -20
- mteb/_evaluators/text/summarization_evaluator.py +31 -20
- mteb/_evaluators/zeroshot_classification_evaluator.py +16 -5
- mteb/_helpful_enum.py +5 -1
- mteb/abstasks/_data_filter/filters.py +9 -3
- mteb/abstasks/_data_filter/task_pipelines.py +10 -2
- mteb/abstasks/_statistics_calculation.py +21 -11
- mteb/abstasks/_stratification.py +18 -18
- mteb/abstasks/abstask.py +78 -44
- mteb/abstasks/aggregate_task_metadata.py +21 -18
- mteb/abstasks/aggregated_task.py +23 -35
- mteb/abstasks/classification.py +39 -18
- mteb/abstasks/clustering.py +37 -20
- mteb/abstasks/clustering_legacy.py +30 -16
- mteb/abstasks/image/image_text_pair_classification.py +26 -9
- mteb/abstasks/multilabel_classification.py +33 -21
- mteb/abstasks/pair_classification.py +44 -19
- mteb/abstasks/regression.py +18 -10
- mteb/abstasks/retrieval.py +82 -52
- mteb/abstasks/retrieval_dataset_loaders.py +50 -39
- mteb/abstasks/sts.py +34 -15
- mteb/abstasks/task_metadata.py +44 -37
- mteb/abstasks/text/bitext_mining.py +57 -35
- mteb/abstasks/text/reranking.py +10 -8
- mteb/abstasks/text/summarization.py +26 -10
- mteb/abstasks/zeroshot_classification.py +27 -9
- mteb/benchmarks/_create_table.py +13 -7
- mteb/benchmarks/benchmark.py +15 -3
- mteb/benchmarks/benchmarks/__init__.py +6 -0
- mteb/benchmarks/benchmarks/benchmarks.py +153 -13
- mteb/benchmarks/benchmarks/rteb_benchmarks.py +20 -9
- mteb/benchmarks/get_benchmark.py +14 -55
- mteb/cache.py +189 -31
- mteb/cli/_display_tasks.py +10 -4
- mteb/cli/build_cli.py +112 -13
- mteb/cli/generate_model_card.py +50 -23
- mteb/deprecated_evaluator.py +72 -54
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2CybersecurityRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EconomicRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EnergyRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2HrRetrieval.json +32 -0
- mteb/descriptive_stats/Retrieval/BrightAopsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightBiologyLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightBiologyRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEarthScienceLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEarthScienceRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEconomicsLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEconomicsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightLeetcodeRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPonyLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPonyRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPsychologyLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPsychologyRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightRoboticsLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightRoboticsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightStackoverflowLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightStackoverflowRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightSustainableLivingLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightSustainableLivingRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightTheoremQAQuestionsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightTheoremQATheoremsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/ChemRxivRetrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/EuroPIRQRetrieval.json +116 -0
- mteb/descriptive_stats/Retrieval/NanoClimateFEVER-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoDBPedia-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoFEVER-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoHotpotQA-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoMSMARCO-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoNQ-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/TVPLRetrieval.json +30 -0
- mteb/evaluate.py +71 -47
- mteb/filter_tasks.py +36 -32
- mteb/get_tasks.py +37 -33
- mteb/languages/language_scripts.py +11 -4
- mteb/leaderboard/app.py +172 -37
- mteb/leaderboard/table.py +7 -2
- mteb/load_results.py +20 -14
- mteb/models/abs_encoder.py +30 -16
- mteb/models/cache_wrappers/cache_backend_protocol.py +7 -7
- mteb/models/cache_wrappers/cache_backends/_hash_utils.py +10 -5
- mteb/models/cache_wrappers/cache_backends/faiss_cache.py +13 -4
- mteb/models/cache_wrappers/cache_backends/numpy_cache.py +43 -25
- mteb/models/cache_wrappers/cache_wrapper.py +16 -11
- mteb/models/get_model_meta.py +53 -9
- mteb/models/instruct_wrapper.py +41 -13
- mteb/models/model_implementations/align_models.py +11 -5
- mteb/models/model_implementations/amazon_models.py +1 -0
- mteb/models/model_implementations/andersborges.py +6 -4
- mteb/models/model_implementations/ara_models.py +2 -1
- mteb/models/model_implementations/arctic_models.py +16 -8
- mteb/models/model_implementations/b1ade_models.py +2 -1
- mteb/models/model_implementations/bedrock_models.py +20 -6
- mteb/models/model_implementations/bge_models.py +85 -22
- mteb/models/model_implementations/bica_model.py +4 -3
- mteb/models/model_implementations/blip2_models.py +13 -6
- mteb/models/model_implementations/blip_models.py +33 -20
- mteb/models/model_implementations/bm25.py +27 -17
- mteb/models/model_implementations/bmretriever_models.py +16 -6
- mteb/models/model_implementations/cadet_models.py +2 -1
- mteb/models/model_implementations/cde_models.py +22 -9
- mteb/models/model_implementations/clip_models.py +18 -10
- mteb/models/model_implementations/clips_models.py +6 -3
- mteb/models/model_implementations/codefuse_models.py +10 -5
- mteb/models/model_implementations/codesage_models.py +6 -3
- mteb/models/model_implementations/cohere_models.py +19 -9
- mteb/models/model_implementations/cohere_v.py +16 -6
- mteb/models/model_implementations/colpali_models.py +10 -6
- mteb/models/model_implementations/colqwen_models.py +24 -38
- mteb/models/model_implementations/colsmol_models.py +5 -3
- mteb/models/model_implementations/conan_models.py +12 -5
- mteb/models/model_implementations/dino_models.py +70 -46
- mteb/models/model_implementations/e5_instruct.py +27 -4
- mteb/models/model_implementations/e5_models.py +18 -9
- mteb/models/model_implementations/e5_v.py +16 -10
- mteb/models/model_implementations/eagerworks_models.py +12 -5
- mteb/models/model_implementations/emillykkejensen_models.py +9 -6
- mteb/models/model_implementations/en_code_retriever.py +2 -1
- mteb/models/model_implementations/euler_models.py +3 -2
- mteb/models/model_implementations/evaclip_models.py +13 -4
- mteb/models/model_implementations/fa_models.py +18 -9
- mteb/models/model_implementations/facebookai.py +16 -2
- mteb/models/model_implementations/geogpt_models.py +2 -1
- mteb/models/model_implementations/gme_v_models.py +13 -8
- mteb/models/model_implementations/google_models.py +16 -5
- mteb/models/model_implementations/granite_vision_embedding_models.py +8 -6
- mteb/models/model_implementations/gritlm_models.py +5 -2
- mteb/models/model_implementations/gte_models.py +34 -13
- mteb/models/model_implementations/hinvec_models.py +7 -2
- mteb/models/model_implementations/human.py +1 -0
- mteb/models/model_implementations/ibm_granite_models.py +36 -6
- mteb/models/model_implementations/inf_models.py +4 -2
- mteb/models/model_implementations/jasper_models.py +16 -7
- mteb/models/model_implementations/jina_clip.py +58 -14
- mteb/models/model_implementations/jina_models.py +35 -16
- mteb/models/model_implementations/kalm_models.py +24 -12
- mteb/models/model_implementations/kblab.py +13 -6
- mteb/models/model_implementations/kennethenevoldsen_models.py +6 -4
- mteb/models/model_implementations/kfst.py +2 -1
- mteb/models/model_implementations/kowshik24_models.py +2 -1
- mteb/models/model_implementations/lens_models.py +2 -0
- mteb/models/model_implementations/lgai_embedding_models.py +2 -1
- mteb/models/model_implementations/linq_models.py +8 -2
- mteb/models/model_implementations/listconranker.py +11 -5
- mteb/models/model_implementations/llm2clip_models.py +18 -10
- mteb/models/model_implementations/llm2vec_models.py +28 -14
- mteb/models/model_implementations/mcinext_models.py +12 -3
- mteb/models/model_implementations/mdbr_models.py +19 -3
- mteb/models/model_implementations/misc_models.py +131 -68
- mteb/models/model_implementations/mixedbread_ai_models.py +335 -0
- mteb/models/model_implementations/mme5_models.py +3 -2
- mteb/models/model_implementations/moco_models.py +15 -8
- mteb/models/model_implementations/mod_models.py +3 -2
- mteb/models/model_implementations/model2vec_models.py +37 -18
- mteb/models/model_implementations/moka_models.py +4 -1
- mteb/models/model_implementations/nbailab.py +6 -3
- mteb/models/model_implementations/no_instruct_sentence_models.py +15 -7
- mteb/models/model_implementations/nomic_models.py +47 -19
- mteb/models/model_implementations/nomic_models_vision.py +6 -4
- mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +20 -8
- mteb/models/model_implementations/nvidia_models.py +165 -22
- mteb/models/model_implementations/octen_models.py +64 -3
- mteb/models/model_implementations/openai_models.py +14 -4
- mteb/models/model_implementations/openclip_models.py +30 -17
- mteb/models/model_implementations/opensearch_neural_sparse_models.py +20 -9
- mteb/models/model_implementations/ops_moa_models.py +10 -3
- mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +2 -1
- mteb/models/model_implementations/pawan_models.py +2 -1
- mteb/models/model_implementations/piccolo_models.py +3 -1
- mteb/models/model_implementations/pixie_models.py +56 -0
- mteb/models/model_implementations/promptriever_models.py +20 -10
- mteb/models/model_implementations/pylate_models.py +41 -21
- mteb/models/model_implementations/qodo_models.py +4 -2
- mteb/models/model_implementations/qtack_models.py +2 -1
- mteb/models/model_implementations/qwen3_models.py +14 -4
- mteb/models/model_implementations/qzhou_models.py +4 -2
- mteb/models/model_implementations/random_baseline.py +7 -6
- mteb/models/model_implementations/rasgaard_models.py +3 -2
- mteb/models/model_implementations/reasonir_model.py +66 -1
- mteb/models/model_implementations/repllama_models.py +18 -9
- mteb/models/model_implementations/rerankers_custom.py +25 -10
- mteb/models/model_implementations/rerankers_monot5_based.py +41 -21
- mteb/models/model_implementations/richinfoai_models.py +2 -1
- mteb/models/model_implementations/ru_sentence_models.py +40 -20
- mteb/models/model_implementations/ruri_models.py +20 -10
- mteb/models/model_implementations/salesforce_models.py +13 -4
- mteb/models/model_implementations/samilpwc_models.py +2 -1
- mteb/models/model_implementations/sarashina_embedding_models.py +4 -2
- mteb/models/model_implementations/searchmap_models.py +2 -1
- mteb/models/model_implementations/seed_1_6_embedding_models.py +5 -2
- mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +119 -148
- mteb/models/model_implementations/seed_models.py +2 -1
- mteb/models/model_implementations/sentence_transformers_models.py +142 -22
- mteb/models/model_implementations/shuu_model.py +2 -1
- mteb/models/model_implementations/siglip_models.py +39 -24
- mteb/models/model_implementations/slm_models.py +419 -0
- mteb/models/model_implementations/sonar_models.py +2 -1
- mteb/models/model_implementations/spartan8806_atles_champion.py +2 -1
- mteb/models/model_implementations/stella_models.py +23 -4
- mteb/models/model_implementations/tarka_models.py +4 -2
- mteb/models/model_implementations/text2vec_models.py +12 -3
- mteb/models/model_implementations/ua_sentence_models.py +2 -1
- mteb/models/model_implementations/uae_models.py +17 -5
- mteb/models/model_implementations/vdr_models.py +9 -2
- mteb/models/model_implementations/vi_vn_models.py +12 -6
- mteb/models/model_implementations/vista_models.py +11 -4
- mteb/models/model_implementations/vlm2vec_models.py +14 -7
- mteb/models/model_implementations/voyage_models.py +136 -4
- mteb/models/model_implementations/voyage_v.py +17 -10
- mteb/models/model_implementations/xyz_models.py +1 -0
- mteb/models/model_implementations/youtu_models.py +2 -1
- mteb/models/model_implementations/yuan_models.py +2 -1
- mteb/models/model_implementations/yuan_models_en.py +3 -2
- mteb/models/model_meta.py +127 -40
- mteb/models/models_protocols.py +43 -22
- mteb/models/search_encoder_index/search_backend_protocol.py +7 -3
- mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +21 -10
- mteb/models/search_wrappers.py +63 -29
- mteb/models/sentence_transformer_wrapper.py +52 -26
- mteb/models/vllm_wrapper.py +329 -0
- mteb/py.typed +0 -0
- mteb/results/benchmark_results.py +48 -35
- mteb/results/model_result.py +68 -32
- mteb/results/task_result.py +110 -72
- mteb/similarity_functions.py +19 -9
- mteb/tasks/aggregated_tasks/eng/cqadupstack_retrieval.py +3 -3
- mteb/tasks/aggregated_tasks/eng/sts17_multilingual_visual_sts_eng.py +3 -3
- mteb/tasks/aggregated_tasks/eng/sts_benchmark_multilingual_visual_sts_eng.py +3 -3
- mteb/tasks/aggregated_tasks/fas/cqadupstack_retrieval_fa.py +3 -3
- mteb/tasks/aggregated_tasks/fas/syn_per_chatbot_conv_sa_classification.py +3 -3
- mteb/tasks/aggregated_tasks/multilingual/sts17_multilingual_vision_sts.py +3 -3
- mteb/tasks/aggregated_tasks/multilingual/sts_benchmark_multilingual_visual_sts.py +3 -3
- mteb/tasks/aggregated_tasks/nld/cqadupstack_nl_retrieval.py +3 -3
- mteb/tasks/aggregated_tasks/pol/cqadupstack_retrieval_pl.py +3 -3
- mteb/tasks/bitext_mining/eng/pub_chem_smiles_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/fas/fa_mteb_summary_retrieval.py +3 -3
- mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/flores_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/in22_conv_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/in22_gen_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/norwegian_courts_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/ntrex_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/roma_tales_bitext_mining.py +2 -2
- mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -2
- mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -1
- mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -1
- mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -1
- mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -1
- mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -1
- mteb/tasks/classification/bul/bulgarian_store_review_sentiment_classfication.py +1 -1
- mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +2 -2
- mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -1
- mteb/tasks/classification/dan/dk_hate_classification.py +2 -2
- mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -1
- mteb/tasks/classification/ell/greek_legal_code_classification.py +1 -1
- mteb/tasks/classification/eng/dbpedia_classification.py +2 -2
- mteb/tasks/classification/eng/toxic_chat_classification.py +2 -2
- mteb/tasks/classification/eng/toxic_conversations_classification.py +2 -2
- mteb/tasks/classification/eng/tweet_topic_single_classification.py +1 -1
- mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -1
- mteb/tasks/classification/eng/yelp_review_full_classification.py +2 -2
- mteb/tasks/classification/est/estonian_valence.py +2 -2
- mteb/tasks/classification/fas/fa_mteb_classification.py +6 -6
- mteb/tasks/classification/fas/persian_food_sentiment_classification.py +1 -1
- mteb/tasks/classification/fil/filipino_shopee_reviews_classification.py +1 -1
- mteb/tasks/classification/fin/fin_toxicity_classification.py +1 -1
- mteb/tasks/classification/fra/french_book_reviews.py +2 -2
- mteb/tasks/classification/fra/movie_review_sentiment_classification.py +2 -2
- mteb/tasks/classification/guj/gujarati_news_classification.py +1 -1
- mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -1
- mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -1
- mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +2 -2
- mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -1
- mteb/tasks/classification/ita/dado_eval_coarse_classification.py +1 -1
- mteb/tasks/classification/ita/ita_casehold_classification.py +1 -1
- mteb/tasks/classification/ita/sardi_stance_classification.py +1 -1
- mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -1
- mteb/tasks/classification/jpn/wrime_classification.py +1 -1
- mteb/tasks/classification/kan/kannada_news_classification.py +2 -2
- mteb/tasks/classification/kor/klue_tc.py +2 -2
- mteb/tasks/classification/kor/kor_fin.py +1 -1
- mteb/tasks/classification/kor/kor_hate_classification.py +1 -1
- mteb/tasks/classification/kor/kor_sarcasm_classification.py +1 -1
- mteb/tasks/classification/kur/kurdish_sentiment_classification.py +2 -2
- mteb/tasks/classification/mal/malayalam_news_classification.py +1 -1
- mteb/tasks/classification/mar/marathi_news_classification.py +1 -1
- mteb/tasks/classification/multilingual/afri_senti_lang_classification.py +1 -1
- mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -1
- mteb/tasks/classification/multilingual/cyrillic_turkic_lang_classification.py +1 -1
- mteb/tasks/classification/multilingual/indic_nlp_news_classification.py +1 -1
- mteb/tasks/classification/multilingual/masakha_news_classification.py +1 -1
- mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -1
- mteb/tasks/classification/multilingual/multilingual_sentiment_classification.py +1 -1
- mteb/tasks/classification/multilingual/scala_classification.py +2 -2
- mteb/tasks/classification/multilingual/sib200_classification.py +1 -1
- mteb/tasks/classification/multilingual/turkic_classification.py +1 -1
- mteb/tasks/classification/multilingual/tweet_sentiment_classification.py +1 -1
- mteb/tasks/classification/nep/nepali_news_classification.py +2 -2
- mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +1 -1
- mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +1 -1
- mteb/tasks/classification/ory/odia_news_classification.py +2 -2
- mteb/tasks/classification/pan/punjabi_news_classification.py +1 -1
- mteb/tasks/classification/ron/moroco.py +1 -1
- mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -1
- mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -1
- mteb/tasks/classification/rus/georeview_classification.py +1 -1
- mteb/tasks/classification/rus/headline_classification.py +2 -2
- mteb/tasks/classification/rus/inappropriateness_classification.py +2 -2
- mteb/tasks/classification/rus/ru_reviews_classification.py +2 -2
- mteb/tasks/classification/rus/ru_sci_bench_grnti_classification.py +1 -1
- mteb/tasks/classification/rus/ru_sci_bench_oecd_classification.py +1 -1
- mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -1
- mteb/tasks/classification/san/sanskrit_shlokas_classification.py +1 -1
- mteb/tasks/classification/sin/sinhala_news_classification.py +2 -2
- mteb/tasks/classification/sin/sinhala_news_source_classification.py +2 -2
- mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +2 -2
- mteb/tasks/classification/slv/frenk_sl_classification.py +1 -1
- mteb/tasks/classification/spa/spanish_news_classification.py +2 -2
- mteb/tasks/classification/ssw/siswati_news_classification.py +1 -1
- mteb/tasks/classification/tam/tamil_news_classification.py +2 -2
- mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +2 -2
- mteb/tasks/classification/tha/wongnai_reviews_classification.py +1 -1
- mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +2 -2
- mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -2
- mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -1
- mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -1
- mteb/tasks/classification/zho/yue_openrice_review_classification.py +2 -2
- mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -1
- mteb/tasks/clustering/deu/blurbs_clustering_p2p.py +1 -1
- mteb/tasks/clustering/deu/blurbs_clustering_s2s.py +1 -1
- mteb/tasks/clustering/eng/arxiv_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/arxiv_hierarchical_clustering.py +2 -2
- mteb/tasks/clustering/eng/big_patent_clustering.py +1 -1
- mteb/tasks/clustering/eng/biorxiv_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/biorxiv_clustering_s2s.py +1 -1
- mteb/tasks/clustering/eng/hume_wiki_cities_clustering.py +1 -1
- mteb/tasks/clustering/eng/medrxiv_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/medrxiv_clustering_s2s.py +1 -1
- mteb/tasks/clustering/eng/reddit_clustering.py +1 -1
- mteb/tasks/clustering/eng/reddit_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/stack_exchange_clustering.py +1 -1
- mteb/tasks/clustering/eng/stack_exchange_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/twenty_newsgroups_clustering.py +1 -1
- mteb/tasks/clustering/eng/wiki_cities_clustering.py +1 -1
- mteb/tasks/clustering/fas/fa_mteb_clustering.py +4 -4
- mteb/tasks/clustering/fra/hal_clustering_s2s.py +2 -2
- mteb/tasks/clustering/multilingual/mlsum_clustering_p2p.py +2 -2
- mteb/tasks/clustering/multilingual/mlsum_clustering_s2s.py +2 -2
- mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -1
- mteb/tasks/clustering/multilingual/wiki_clustering_p2p.py +1 -1
- mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +1 -1
- mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +1 -1
- mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +1 -1
- mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +1 -1
- mteb/tasks/clustering/nld/vabb_clustering_p2p.py +1 -1
- mteb/tasks/clustering/nld/vabb_clustering_s2s.py +1 -1
- mteb/tasks/clustering/nob/snl_clustering.py +8 -3
- mteb/tasks/clustering/nob/vg_clustering.py +8 -3
- mteb/tasks/clustering/pol/polish_clustering.py +3 -3
- mteb/tasks/clustering/rus/ru_sci_bench_grnti_clustering_p2p.py +1 -1
- mteb/tasks/clustering/rus/ru_sci_bench_oecd_clustering_p2p.py +1 -1
- mteb/tasks/clustering/zho/cmteb_clustering.py +6 -6
- mteb/tasks/image_text_pair_classification/eng/image_co_de.py +1 -1
- mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +2 -2
- mteb/tasks/instruction_reranking/multilingual/m_follow_ir.py +2 -2
- mteb/tasks/multichoice/eng/cv_bench.py +4 -4
- mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -1
- mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -1
- mteb/tasks/multilabel_classification/rus/ru_toixic_multilabelclassification_okmlcup.py +1 -1
- mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
- mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -1
- mteb/tasks/pair_classification/ara/ar_entail.py +1 -1
- mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -1
- mteb/tasks/pair_classification/deu/false_friends_de_en_pc.py +1 -1
- mteb/tasks/pair_classification/eng/pub_chem_ai_sentence_paraphrase_pc.py +1 -1
- mteb/tasks/pair_classification/eng/pub_chem_smilespc.py +4 -3
- mteb/tasks/pair_classification/eng/pub_chem_synonym_pc.py +1 -1
- mteb/tasks/pair_classification/eng/pub_chem_wiki_paragraphs_pc.py +1 -1
- mteb/tasks/pair_classification/eng/sprint_duplicate_questions_pc.py +1 -1
- mteb/tasks/pair_classification/eng/twitter_sem_eval2015_pc.py +1 -1
- mteb/tasks/pair_classification/eng/twitter_url_corpus_pc.py +1 -1
- mteb/tasks/pair_classification/fas/fa_mteb_pair_classification.py +5 -5
- mteb/tasks/pair_classification/fas/fars_tail.py +2 -2
- mteb/tasks/pair_classification/hye/armenian_paraphrase_pc.py +1 -1
- mteb/tasks/pair_classification/ita/dis_co_tex_pair_classification.py +1 -1
- mteb/tasks/pair_classification/kor/klue_nli.py +1 -1
- mteb/tasks/pair_classification/multilingual/rte3.py +2 -2
- mteb/tasks/pair_classification/multilingual/xnli.py +1 -1
- mteb/tasks/pair_classification/pol/polish_pc.py +4 -4
- mteb/tasks/pair_classification/por/assin2_rte.py +1 -1
- mteb/tasks/pair_classification/por/sick_br_pc.py +1 -1
- mteb/tasks/pair_classification/rus/terra.py +2 -2
- mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -1
- mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -1
- mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -1
- mteb/tasks/pair_classification/zho/cmteb_pair_classification.py +2 -2
- mteb/tasks/reranking/multilingual/wikipedia_reranking_multilingual.py +1 -1
- mteb/tasks/retrieval/ara/sadeem_question_retrieval.py +1 -1
- mteb/tasks/retrieval/code/code_edit_search_retrieval.py +1 -1
- mteb/tasks/retrieval/code/code_rag.py +16 -16
- mteb/tasks/retrieval/code/code_search_net_cc_retrieval.py +1 -1
- mteb/tasks/retrieval/code/coir_code_search_net_retrieval.py +1 -1
- mteb/tasks/retrieval/code/ds1000_retrieval.py +1 -1
- mteb/tasks/retrieval/code/fresh_stack_retrieval.py +1 -1
- mteb/tasks/retrieval/code/human_eval_retrieval.py +1 -1
- mteb/tasks/retrieval/code/mbpp_retrieval.py +1 -1
- mteb/tasks/retrieval/code/wiki_sql_retrieval.py +1 -1
- mteb/tasks/retrieval/dan/dan_fever_retrieval.py +2 -2
- mteb/tasks/retrieval/dan/tv2_nordretrieval.py +3 -3
- mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +3 -3
- mteb/tasks/retrieval/deu/german_gov_service_retrieval.py +1 -1
- mteb/tasks/retrieval/deu/german_qu_ad_retrieval.py +1 -1
- mteb/tasks/retrieval/ell/greek_civics_qa.py +1 -1
- mteb/tasks/retrieval/eng/__init__.py +44 -0
- mteb/tasks/retrieval/eng/bright_retrieval.py +10 -2
- mteb/tasks/retrieval/eng/bright_v1_1_retrieval.py +968 -0
- mteb/tasks/retrieval/eng/chat_doctor_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/chemrxiv.py +33 -0
- mteb/tasks/retrieval/eng/cub200_i2i_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/fin_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/finance_bench_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/hc3_finance_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_narrative_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_needle_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_passkey_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_summ_screen_fd_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_wikim_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lembqm_sum_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/limit_retrieval.py +6 -1
- mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/ml_questions.py +1 -1
- mteb/tasks/retrieval/eng/nano_argu_ana_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_climate_fever_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_db_pedia_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_fever_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_fi_qa2018_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_hotpot_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_msmarco_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_nf_corpus_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_nq_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_quora_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_sci_fact_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_scidocs_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_touche2020_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/narrative_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/r2_med_retrieval.py +8 -8
- mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +10 -10
- mteb/tasks/retrieval/fra/f_qu_ad_retrieval.py +1 -1
- mteb/tasks/retrieval/fra/syntec_retrieval.py +1 -1
- mteb/tasks/retrieval/hun/hun_sum2.py +1 -1
- mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +1 -1
- mteb/tasks/retrieval/kor/__init__.py +15 -1
- mteb/tasks/retrieval/kor/kovidore2_bench_retrieval.py +142 -0
- mteb/tasks/retrieval/multilingual/__init__.py +2 -0
- mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt19.py +1 -1
- mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt21.py +1 -1
- mteb/tasks/retrieval/multilingual/cur_ev1_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/euro_pirq_retrieval.py +43 -0
- mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/mr_tidy_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +5 -5
- mteb/tasks/retrieval/multilingual/statcan_dialogue_dataset_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +14 -4
- mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +90 -100
- mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/x_qu_ad_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_android_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_english_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_gaming_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_gis_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_mathematica_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_physics_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_programmers_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_stats_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_tex_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_unix_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_webmasters_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_wordpress_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nob/norquad.py +3 -3
- mteb/tasks/retrieval/nob/snl_retrieval.py +3 -3
- mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -1
- mteb/tasks/retrieval/tur/tur_hist_quad.py +1 -1
- mteb/tasks/retrieval/vie/__init__.py +14 -6
- mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/fevervn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +48 -0
- mteb/tasks/retrieval/vie/nqvn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/tvpl_retrieval.py +42 -0
- mteb/tasks/retrieval/vie/vie_qu_ad_retrieval.py +1 -1
- mteb/tasks/retrieval/vie/zac_legal_text_retrieval.py +15 -1
- mteb/tasks/sts/fao/faroese_sts.py +1 -1
- mteb/tasks/sts/fra/sick_fr_sts.py +1 -1
- mteb/tasks/sts/kor/klue_sts.py +1 -1
- mteb/tasks/sts/por/sick_br_sts.py +1 -1
- mteb/tasks/sts/rus/ru_para_phraser_sts.py +1 -1
- mteb/tasks/zeroshot_classification/eng/sci_mmir.py +1 -1
- mteb/types/__init__.py +2 -0
- mteb/types/_encoder_io.py +13 -1
- mteb/types/_result.py +2 -1
- mteb/types/statistics.py +18 -5
- {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/METADATA +15 -4
- {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/RECORD +528 -486
- {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/WHEEL +1 -1
- mteb/models/model_implementations/mxbai_models.py +0 -111
- {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/entry_points.txt +0 -0
- {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/top_level.txt +0 -0
mteb/__init__.py
CHANGED
|
@@ -3,6 +3,7 @@ from importlib.metadata import version
|
|
|
3
3
|
from mteb import types
|
|
4
4
|
from mteb.abstasks import AbsTask
|
|
5
5
|
from mteb.abstasks.task_metadata import TaskMetadata
|
|
6
|
+
from mteb.cache import ResultCache
|
|
6
7
|
from mteb.deprecated_evaluator import MTEB
|
|
7
8
|
from mteb.evaluate import evaluate
|
|
8
9
|
from mteb.filter_tasks import filter_tasks
|
|
@@ -33,6 +34,7 @@ __all__ = [
|
|
|
33
34
|
"CrossEncoderProtocol",
|
|
34
35
|
"EncoderProtocol",
|
|
35
36
|
"IndexEncoderSearchProtocol",
|
|
37
|
+
"ResultCache",
|
|
36
38
|
"SearchProtocol",
|
|
37
39
|
"SentenceTransformerEncoderWrapper",
|
|
38
40
|
"TaskMetadata",
|
mteb/_create_dataloaders.py
CHANGED
|
@@ -1,20 +1,28 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import logging
|
|
2
|
-
|
|
3
|
-
from typing import Any, cast
|
|
4
|
+
import warnings
|
|
5
|
+
from typing import TYPE_CHECKING, Any, cast
|
|
4
6
|
|
|
5
7
|
import torch
|
|
6
8
|
from datasets import Dataset, Image
|
|
7
9
|
from torch.utils.data import DataLoader, default_collate
|
|
8
10
|
|
|
9
|
-
from mteb.abstasks.task_metadata import TaskMetadata
|
|
10
11
|
from mteb.types import (
|
|
11
|
-
BatchedInput,
|
|
12
|
-
Conversation,
|
|
13
12
|
ConversationTurn,
|
|
14
13
|
PromptType,
|
|
15
|
-
QueryDatasetType,
|
|
16
14
|
)
|
|
17
|
-
|
|
15
|
+
|
|
16
|
+
if TYPE_CHECKING:
|
|
17
|
+
from collections.abc import Callable
|
|
18
|
+
|
|
19
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
20
|
+
from mteb.types import (
|
|
21
|
+
BatchedInput,
|
|
22
|
+
Conversation,
|
|
23
|
+
QueryDatasetType,
|
|
24
|
+
)
|
|
25
|
+
from mteb.types._encoder_io import CorpusInput, ImageInput, QueryInput, TextInput
|
|
18
26
|
|
|
19
27
|
logger = logging.getLogger(__name__)
|
|
20
28
|
|
|
@@ -22,22 +30,25 @@ logger = logging.getLogger(__name__)
|
|
|
22
30
|
def _create_dataloader_from_texts(
|
|
23
31
|
text: list[str],
|
|
24
32
|
batch_size: int = 32,
|
|
25
|
-
|
|
33
|
+
num_proc: int = 1,
|
|
34
|
+
**kwargs: Any,
|
|
26
35
|
) -> DataLoader[TextInput]:
|
|
27
36
|
"""Create a dataloader from a list of text.
|
|
28
37
|
|
|
29
38
|
Args:
|
|
30
39
|
text: A list of text to create a dataloader from.
|
|
31
40
|
batch_size: Batch size for the dataloader.
|
|
41
|
+
num_proc: Number of processes to use.
|
|
32
42
|
kwargs: Not used, present catching extra arguments.
|
|
33
43
|
|
|
34
44
|
Returns:
|
|
35
45
|
A dataloader with the text.
|
|
36
46
|
"""
|
|
37
47
|
dataset = Dataset.from_dict({"text": text})
|
|
38
|
-
return
|
|
48
|
+
return DataLoader(
|
|
39
49
|
dataset,
|
|
40
50
|
batch_size=batch_size,
|
|
51
|
+
num_workers=num_proc if num_proc > 1 else 0,
|
|
41
52
|
)
|
|
42
53
|
|
|
43
54
|
|
|
@@ -63,20 +74,27 @@ def _corpus_to_dict(
|
|
|
63
74
|
def _create_dataloader_for_retrieval_corpus(
|
|
64
75
|
dataset: Dataset,
|
|
65
76
|
batch_size: int = 32,
|
|
77
|
+
num_proc: int = 1,
|
|
66
78
|
) -> DataLoader[CorpusInput]:
|
|
67
79
|
"""Create a dataloader from a corpus.
|
|
68
80
|
|
|
69
81
|
Args:
|
|
70
82
|
dataset: Corpus
|
|
71
83
|
batch_size: Batch size for the dataloader.
|
|
84
|
+
num_proc: Number of processes to use.
|
|
72
85
|
|
|
73
86
|
Returns:
|
|
74
87
|
A dataloader with the corpus.
|
|
75
88
|
"""
|
|
76
|
-
new_ds = dataset.map(
|
|
77
|
-
|
|
89
|
+
new_ds = dataset.map(
|
|
90
|
+
_corpus_to_dict,
|
|
91
|
+
desc="Converting corpus dict",
|
|
92
|
+
num_proc=num_proc,
|
|
93
|
+
)
|
|
94
|
+
return DataLoader(
|
|
78
95
|
new_ds,
|
|
79
96
|
batch_size=batch_size,
|
|
97
|
+
num_workers=num_proc if num_proc > 1 else 0,
|
|
80
98
|
)
|
|
81
99
|
|
|
82
100
|
|
|
@@ -93,12 +111,14 @@ def _combine_queries_with_instruction_text(row: dict[str, str]) -> dict[str, str
|
|
|
93
111
|
def _create_text_dataloader_for_queries(
|
|
94
112
|
queries: QueryDatasetType,
|
|
95
113
|
batch_size: int = 32,
|
|
114
|
+
num_proc: int = 1,
|
|
96
115
|
) -> DataLoader[QueryInput]:
|
|
97
116
|
"""Create a dataloader from a list of queries.
|
|
98
117
|
|
|
99
118
|
Args:
|
|
100
119
|
queries: A list of queries.
|
|
101
120
|
batch_size: Batch size for the dataloader.
|
|
121
|
+
num_proc: Number of processes to use.
|
|
102
122
|
|
|
103
123
|
Returns:
|
|
104
124
|
A dataloader with the queries.
|
|
@@ -106,18 +126,17 @@ def _create_text_dataloader_for_queries(
|
|
|
106
126
|
queries = queries.map(
|
|
107
127
|
_combine_queries_with_instruction_text,
|
|
108
128
|
desc="Processing queries for dataloading",
|
|
129
|
+
num_proc=num_proc,
|
|
109
130
|
)
|
|
110
|
-
return
|
|
131
|
+
return DataLoader(
|
|
111
132
|
queries,
|
|
112
133
|
batch_size=batch_size,
|
|
134
|
+
num_workers=num_proc if num_proc > 1 else 0,
|
|
113
135
|
)
|
|
114
136
|
|
|
115
137
|
|
|
116
|
-
_warned_about_user_role = False
|
|
117
|
-
|
|
118
|
-
|
|
119
138
|
def _convert_conv_history_to_query(
|
|
120
|
-
row: dict[str, list[str] | Conversation],
|
|
139
|
+
row: dict[str, str | list[str] | Conversation],
|
|
121
140
|
) -> dict[str, str | Conversation]:
|
|
122
141
|
"""Convert a conversation history to a single query string.
|
|
123
142
|
|
|
@@ -127,21 +146,18 @@ def _convert_conv_history_to_query(
|
|
|
127
146
|
Returns:
|
|
128
147
|
The updated row with the "query" and "text" fields set to the conversation string, and the "conversation" field set to the list of ConversationTurn.
|
|
129
148
|
"""
|
|
130
|
-
global _warned_about_user_role
|
|
131
|
-
|
|
132
149
|
conversation = row["text"]
|
|
133
150
|
# if it's a list of strings, just join them
|
|
134
151
|
if isinstance(conversation, list) and isinstance(conversation[0], str):
|
|
135
|
-
|
|
136
|
-
conv_str = "; ".join(
|
|
152
|
+
conversation_ = cast("list[str]", conversation)
|
|
153
|
+
conv_str = "; ".join(conversation_)
|
|
137
154
|
current_conversation = [
|
|
138
|
-
ConversationTurn(role="user", content=message) for message in
|
|
155
|
+
ConversationTurn(role="user", content=message) for message in conversation_
|
|
139
156
|
]
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
_warned_about_user_role = True
|
|
157
|
+
warnings.warn(
|
|
158
|
+
"Conversations are a list of strings. Used 'user' role for all turns.",
|
|
159
|
+
category=UserWarning,
|
|
160
|
+
)
|
|
145
161
|
# otherwise, it's a list of dictionaries, which we need to convert to strings
|
|
146
162
|
elif isinstance(conversation, list) and isinstance(conversation[0], dict):
|
|
147
163
|
conv = []
|
|
@@ -178,28 +194,33 @@ def _convert_conv_history_to_query(
|
|
|
178
194
|
|
|
179
195
|
row["text"] = conv_str
|
|
180
196
|
row["conversation"] = current_conversation
|
|
181
|
-
return row
|
|
197
|
+
return cast("dict[str, str | list[ConversationTurn]]", row)
|
|
182
198
|
|
|
183
199
|
|
|
184
200
|
def _create_dataloader_for_queries_conversation(
|
|
185
201
|
queries: QueryDatasetType,
|
|
186
202
|
batch_size: int = 32,
|
|
203
|
+
num_proc: int = 1,
|
|
187
204
|
) -> DataLoader[QueryInput]:
|
|
188
205
|
"""Create a dataloader from a list of queries.
|
|
189
206
|
|
|
190
207
|
Args:
|
|
191
208
|
queries: A list of queries.
|
|
192
209
|
batch_size: Batch size for the dataloader.
|
|
210
|
+
num_proc: Number of processes to use.
|
|
193
211
|
|
|
194
212
|
Returns:
|
|
195
213
|
A dataloader with the queries.
|
|
196
214
|
"""
|
|
197
215
|
return DataLoader(
|
|
198
216
|
queries.map(
|
|
199
|
-
_convert_conv_history_to_query,
|
|
217
|
+
_convert_conv_history_to_query,
|
|
218
|
+
desc="Converting conversations to queries",
|
|
219
|
+
num_proc=num_proc,
|
|
200
220
|
),
|
|
201
221
|
collate_fn=_custom_collate_fn,
|
|
202
222
|
batch_size=batch_size,
|
|
223
|
+
num_workers=num_proc if num_proc > 1 else 0,
|
|
203
224
|
)
|
|
204
225
|
|
|
205
226
|
|
|
@@ -244,6 +265,7 @@ def _prepare_image_dataset(
|
|
|
244
265
|
dataset: Dataset,
|
|
245
266
|
image_column_name: str | None = None,
|
|
246
267
|
transform: Callable[[Any], Any] | None = None,
|
|
268
|
+
num_proc: int = 1,
|
|
247
269
|
) -> Dataset:
|
|
248
270
|
"""Prepare the image dataset by converting images to RGB and applying transformations."""
|
|
249
271
|
if (
|
|
@@ -259,6 +281,7 @@ def _prepare_image_dataset(
|
|
|
259
281
|
_convert_images_to_rgb,
|
|
260
282
|
fn_kwargs={"image_col_name": "image", "transform": transform},
|
|
261
283
|
desc="Converting images to RGB",
|
|
284
|
+
num_proc=num_proc,
|
|
262
285
|
)
|
|
263
286
|
|
|
264
287
|
|
|
@@ -292,6 +315,7 @@ def _create_image_dataloader(
|
|
|
292
315
|
batch_size: int = 32,
|
|
293
316
|
transform: Callable[[Any], Any] | None = None,
|
|
294
317
|
collate_fn: Callable[[list[dict[str, Any]]], dict[str, Any]] = _custom_collate_fn,
|
|
318
|
+
num_proc: int = 1,
|
|
295
319
|
) -> DataLoader[ImageInput]:
|
|
296
320
|
"""Creates a DataLoader with the image dataset prepared using the explicit transformation.
|
|
297
321
|
|
|
@@ -301,33 +325,41 @@ def _create_image_dataloader(
|
|
|
301
325
|
batch_size: Batch size for the dataloader.
|
|
302
326
|
transform: A transformation function to apply to each image (e.g., converting to tensor).
|
|
303
327
|
collate_fn: A custom collate function to handle batching.
|
|
328
|
+
num_proc: Number of processes to use.
|
|
304
329
|
|
|
305
330
|
Returns:
|
|
306
331
|
A DataLoader with the image dataset.
|
|
307
332
|
"""
|
|
308
333
|
dataset = _prepare_image_dataset(
|
|
309
|
-
dataset,
|
|
334
|
+
dataset,
|
|
335
|
+
image_column_name,
|
|
336
|
+
transform,
|
|
337
|
+
num_proc=num_proc,
|
|
310
338
|
).select_columns(["image"])
|
|
311
339
|
return DataLoader(
|
|
312
340
|
dataset,
|
|
313
341
|
batch_size=batch_size,
|
|
314
342
|
collate_fn=collate_fn,
|
|
315
343
|
shuffle=False,
|
|
344
|
+
num_workers=num_proc if num_proc > 1 else 0,
|
|
316
345
|
)
|
|
317
346
|
|
|
318
347
|
|
|
319
348
|
def _create_text_queries_dataloader(
|
|
320
349
|
dataset: Dataset,
|
|
321
350
|
batch_size: int = 32,
|
|
351
|
+
num_proc: int = 1,
|
|
322
352
|
) -> DataLoader[QueryInput]:
|
|
323
353
|
if not isinstance(dataset["text"][0], list):
|
|
324
354
|
return _create_text_dataloader_for_queries(
|
|
325
355
|
dataset,
|
|
326
356
|
batch_size=batch_size,
|
|
357
|
+
num_proc=num_proc,
|
|
327
358
|
)
|
|
328
359
|
return _create_dataloader_for_queries_conversation(
|
|
329
360
|
dataset,
|
|
330
361
|
batch_size=batch_size,
|
|
362
|
+
num_proc=num_proc,
|
|
331
363
|
)
|
|
332
364
|
|
|
333
365
|
|
|
@@ -336,6 +368,7 @@ def _create_queries_dataloader(
|
|
|
336
368
|
task_metadata: TaskMetadata,
|
|
337
369
|
input_column: str | None = None,
|
|
338
370
|
batch_size: int = 32,
|
|
371
|
+
num_proc: int = 1,
|
|
339
372
|
) -> DataLoader[QueryInput | ImageInput]:
|
|
340
373
|
"""Create a dataloader for queries."""
|
|
341
374
|
queries_type = task_metadata.get_modalities(PromptType.query)
|
|
@@ -343,12 +376,14 @@ def _create_queries_dataloader(
|
|
|
343
376
|
return _create_text_queries_dataloader(
|
|
344
377
|
dataset,
|
|
345
378
|
batch_size=batch_size,
|
|
379
|
+
num_proc=num_proc,
|
|
346
380
|
)
|
|
347
381
|
if "image" in queries_type: # contains image
|
|
348
382
|
return _create_image_dataloader(
|
|
349
383
|
dataset,
|
|
350
384
|
image_column_name="image",
|
|
351
385
|
batch_size=batch_size,
|
|
386
|
+
num_proc=num_proc,
|
|
352
387
|
)
|
|
353
388
|
raise ValueError(f"Can't handle queries type {queries_type}")
|
|
354
389
|
|
|
@@ -358,6 +393,7 @@ def _create_document_dataloader(
|
|
|
358
393
|
task_metadata: TaskMetadata,
|
|
359
394
|
input_column: str | None = None,
|
|
360
395
|
batch_size: int = 32,
|
|
396
|
+
num_proc: int = 1,
|
|
361
397
|
) -> DataLoader[CorpusInput | ImageInput]:
|
|
362
398
|
"""Create a dataloader for documents.
|
|
363
399
|
|
|
@@ -366,18 +402,24 @@ def _create_document_dataloader(
|
|
|
366
402
|
task_metadata: Metadata of the task to determine the document type.
|
|
367
403
|
input_column: The column to use as input. If None, it will use the first column that matches the modality.
|
|
368
404
|
batch_size: Batch size for the dataloader.
|
|
405
|
+
num_proc: Number of processes to use.
|
|
406
|
+
|
|
407
|
+
Returns:
|
|
408
|
+
A dataloader for the documents.
|
|
369
409
|
"""
|
|
370
410
|
document_type = task_metadata.get_modalities(PromptType.document)
|
|
371
411
|
if document_type == ["text"]: # text only
|
|
372
412
|
return _create_dataloader_for_retrieval_corpus(
|
|
373
413
|
dataset,
|
|
374
414
|
batch_size=batch_size,
|
|
415
|
+
num_proc=num_proc,
|
|
375
416
|
)
|
|
376
417
|
if "image" in document_type: # contains image
|
|
377
418
|
return _create_image_dataloader(
|
|
378
419
|
dataset,
|
|
379
420
|
image_column_name="image",
|
|
380
421
|
batch_size=batch_size,
|
|
422
|
+
num_proc=num_proc,
|
|
381
423
|
)
|
|
382
424
|
raise ValueError(f"Can't handle queries type {document_type}")
|
|
383
425
|
|
|
@@ -388,7 +430,8 @@ def create_dataloader(
|
|
|
388
430
|
prompt_type: PromptType | None = None,
|
|
389
431
|
input_column: str | None = None,
|
|
390
432
|
batch_size: int = 32,
|
|
391
|
-
|
|
433
|
+
num_proc: int = 1,
|
|
434
|
+
**kwargs: Any,
|
|
392
435
|
) -> DataLoader[BatchedInput]:
|
|
393
436
|
"""Create a dataloader from a dataset.
|
|
394
437
|
|
|
@@ -401,6 +444,7 @@ def create_dataloader(
|
|
|
401
444
|
prompt_type: The type of prompt to create a dataloader for. If None, it will be inferred from the task metadata.
|
|
402
445
|
input_column: The column to use as input. If None, it will use the first column that matches the modality.
|
|
403
446
|
batch_size: The batch size for the dataloader.
|
|
447
|
+
num_proc: The number of processes to use for dataset processing.
|
|
404
448
|
**kwargs: Additional arguments to pass to the dataloader creation functions.
|
|
405
449
|
|
|
406
450
|
Returns:
|
|
@@ -412,6 +456,7 @@ def create_dataloader(
|
|
|
412
456
|
task_metadata,
|
|
413
457
|
batch_size=batch_size,
|
|
414
458
|
input_column=input_column,
|
|
459
|
+
num_proc=num_proc,
|
|
415
460
|
)
|
|
416
461
|
if prompt_type == PromptType.document:
|
|
417
462
|
return _create_document_dataloader(
|
|
@@ -419,6 +464,7 @@ def create_dataloader(
|
|
|
419
464
|
task_metadata,
|
|
420
465
|
input_column=input_column,
|
|
421
466
|
batch_size=batch_size,
|
|
467
|
+
num_proc=num_proc,
|
|
422
468
|
)
|
|
423
469
|
|
|
424
470
|
if "image" in task_metadata.modalities:
|
|
@@ -426,6 +472,7 @@ def create_dataloader(
|
|
|
426
472
|
dataset,
|
|
427
473
|
image_column_name=input_column,
|
|
428
474
|
batch_size=batch_size,
|
|
475
|
+
num_proc=num_proc,
|
|
429
476
|
)
|
|
430
477
|
if "text" in task_metadata.modalities and input_column is not None:
|
|
431
478
|
return _create_dataloader_from_texts(
|
|
@@ -435,4 +482,5 @@ def create_dataloader(
|
|
|
435
482
|
return DataLoader(
|
|
436
483
|
dataset,
|
|
437
484
|
batch_size=batch_size,
|
|
485
|
+
num_workers=num_proc if num_proc > 1 else 0,
|
|
438
486
|
)
|
|
@@ -1,7 +1,8 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import logging
|
|
2
|
-
from typing import
|
|
4
|
+
from typing import TYPE_CHECKING, TypedDict
|
|
3
5
|
|
|
4
|
-
from datasets import Dataset
|
|
5
6
|
from sklearn.metrics.pairwise import (
|
|
6
7
|
paired_cosine_distances,
|
|
7
8
|
paired_euclidean_distances,
|
|
@@ -9,13 +10,17 @@ from sklearn.metrics.pairwise import (
|
|
|
9
10
|
)
|
|
10
11
|
|
|
11
12
|
from mteb._create_dataloaders import create_dataloader
|
|
12
|
-
from mteb.abstasks.task_metadata import TaskMetadata
|
|
13
|
-
from mteb.models import EncoderProtocol
|
|
14
13
|
from mteb.similarity_functions import compute_pairwise_similarity
|
|
15
|
-
from mteb.types import PromptType
|
|
16
14
|
|
|
17
15
|
from .evaluator import Evaluator
|
|
18
16
|
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
from datasets import Dataset
|
|
19
|
+
|
|
20
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
21
|
+
from mteb.models import EncoderProtocol
|
|
22
|
+
from mteb.types import EncodeKwargs, PromptType
|
|
23
|
+
|
|
19
24
|
logger = logging.getLogger(__name__)
|
|
20
25
|
|
|
21
26
|
|
|
@@ -60,7 +65,8 @@ class AnySTSEvaluator(Evaluator):
|
|
|
60
65
|
self,
|
|
61
66
|
model: EncoderProtocol,
|
|
62
67
|
*,
|
|
63
|
-
encode_kwargs:
|
|
68
|
+
encode_kwargs: EncodeKwargs,
|
|
69
|
+
num_proc: int = 1,
|
|
64
70
|
) -> STSEvaluatorScores:
|
|
65
71
|
logger.info("Running semantic similarity - Encoding samples (1/2)")
|
|
66
72
|
embeddings1 = model.encode(
|
|
@@ -68,6 +74,7 @@ class AnySTSEvaluator(Evaluator):
|
|
|
68
74
|
self.dataset,
|
|
69
75
|
self.task_metadata,
|
|
70
76
|
input_column=self.input_columns[0],
|
|
77
|
+
num_proc=num_proc,
|
|
71
78
|
**encode_kwargs,
|
|
72
79
|
),
|
|
73
80
|
task_metadata=self.task_metadata,
|
|
@@ -1,15 +1,21 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import logging
|
|
2
|
-
from typing import
|
|
4
|
+
from typing import TYPE_CHECKING
|
|
3
5
|
|
|
4
|
-
from datasets import Dataset
|
|
5
6
|
from sklearn import cluster
|
|
6
7
|
|
|
7
8
|
from mteb._create_dataloaders import create_dataloader
|
|
8
|
-
from mteb.abstasks.task_metadata import TaskMetadata
|
|
9
|
-
from mteb.models import EncoderProtocol
|
|
10
9
|
|
|
11
10
|
from .evaluator import Evaluator
|
|
12
11
|
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from datasets import Dataset
|
|
14
|
+
|
|
15
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
16
|
+
from mteb.models import EncoderProtocol
|
|
17
|
+
from mteb.types import EncodeKwargs
|
|
18
|
+
|
|
13
19
|
logger = logging.getLogger(__name__)
|
|
14
20
|
|
|
15
21
|
|
|
@@ -38,12 +44,14 @@ class ClusteringEvaluator(Evaluator):
|
|
|
38
44
|
self,
|
|
39
45
|
model: EncoderProtocol,
|
|
40
46
|
*,
|
|
41
|
-
encode_kwargs:
|
|
47
|
+
encode_kwargs: EncodeKwargs,
|
|
48
|
+
num_proc: int = 1,
|
|
42
49
|
) -> list[int]:
|
|
43
50
|
data_loader = create_dataloader(
|
|
44
51
|
self.dataset,
|
|
45
52
|
self.task_metadata,
|
|
46
53
|
input_column=self.input_column_name,
|
|
54
|
+
num_proc=num_proc,
|
|
47
55
|
**encode_kwargs,
|
|
48
56
|
)
|
|
49
57
|
|
mteb/_evaluators/evaluator.py
CHANGED
|
@@ -1,8 +1,15 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
from abc import ABC, abstractmethod
|
|
2
|
-
from typing import Any
|
|
4
|
+
from typing import TYPE_CHECKING, Any
|
|
3
5
|
|
|
4
6
|
from mteb.abstasks.abstask import _set_seed
|
|
5
|
-
|
|
7
|
+
|
|
8
|
+
if TYPE_CHECKING:
|
|
9
|
+
from collections.abc import Iterable, Mapping
|
|
10
|
+
|
|
11
|
+
from mteb.models import EncoderProtocol
|
|
12
|
+
from mteb.types import EncodeKwargs
|
|
6
13
|
|
|
7
14
|
|
|
8
15
|
class Evaluator(ABC):
|
|
@@ -17,8 +24,8 @@ class Evaluator(ABC):
|
|
|
17
24
|
|
|
18
25
|
@abstractmethod
|
|
19
26
|
def __call__(
|
|
20
|
-
self, model: EncoderProtocol, *, encode_kwargs:
|
|
21
|
-
) ->
|
|
27
|
+
self, model: EncoderProtocol, *, encode_kwargs: EncodeKwargs, num_proc: int = 1
|
|
28
|
+
) -> Mapping[str, float] | Iterable[Any]:
|
|
22
29
|
"""This is called during training to evaluate the model.
|
|
23
30
|
|
|
24
31
|
It returns scores.
|
|
@@ -26,5 +33,6 @@ class Evaluator(ABC):
|
|
|
26
33
|
Args:
|
|
27
34
|
model: the model to evaluate
|
|
28
35
|
encode_kwargs: kwargs to pass to the model's encode method
|
|
36
|
+
num_proc: number of processes to use for data loading
|
|
29
37
|
"""
|
|
30
38
|
pass
|
|
@@ -5,20 +5,24 @@ from typing import TYPE_CHECKING, Any
|
|
|
5
5
|
|
|
6
6
|
import torch
|
|
7
7
|
import torch.nn.functional as F
|
|
8
|
-
from datasets import Dataset
|
|
9
8
|
from torch.utils.data import DataLoader
|
|
10
9
|
|
|
11
10
|
from mteb._create_dataloaders import (
|
|
11
|
+
_create_dataloader_from_texts,
|
|
12
12
|
_transform_image_to_rgb,
|
|
13
13
|
)
|
|
14
14
|
from mteb._evaluators.evaluator import Evaluator
|
|
15
15
|
from mteb._requires_package import requires_image_dependencies
|
|
16
|
-
from mteb.abstasks.task_metadata import TaskMetadata
|
|
17
|
-
from mteb.models.models_protocols import EncoderProtocol
|
|
18
16
|
|
|
19
17
|
if TYPE_CHECKING:
|
|
18
|
+
from collections.abc import Sequence
|
|
19
|
+
|
|
20
20
|
from PIL.Image import Image
|
|
21
21
|
|
|
22
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
23
|
+
from mteb.models.models_protocols import EncoderProtocol
|
|
24
|
+
from mteb.types import EncodeKwargs
|
|
25
|
+
|
|
22
26
|
|
|
23
27
|
logger = logging.getLogger(__name__)
|
|
24
28
|
|
|
@@ -61,8 +65,8 @@ class ImageTextPairClassificationEvaluator(Evaluator):
|
|
|
61
65
|
def __init__(
|
|
62
66
|
self,
|
|
63
67
|
dataset,
|
|
64
|
-
images_column_names: str |
|
|
65
|
-
texts_column_names: str |
|
|
68
|
+
images_column_names: str | Sequence[str],
|
|
69
|
+
texts_column_names: str | Sequence[str],
|
|
66
70
|
num_images_per_sample: int,
|
|
67
71
|
num_texts_per_sample: int,
|
|
68
72
|
task_metadata: TaskMetadata,
|
|
@@ -82,10 +86,12 @@ class ImageTextPairClassificationEvaluator(Evaluator):
|
|
|
82
86
|
self.hf_split = hf_split
|
|
83
87
|
self.hf_subset = hf_subset
|
|
84
88
|
|
|
85
|
-
def __call__(
|
|
89
|
+
def __call__( # type: ignore[override]
|
|
86
90
|
self,
|
|
87
91
|
model: EncoderProtocol,
|
|
88
|
-
|
|
92
|
+
*,
|
|
93
|
+
encode_kwargs: EncodeKwargs,
|
|
94
|
+
num_proc: int = 1,
|
|
89
95
|
) -> list[torch.Tensor]:
|
|
90
96
|
images = []
|
|
91
97
|
if isinstance(self.images_column_names, str):
|
|
@@ -106,8 +112,9 @@ class ImageTextPairClassificationEvaluator(Evaluator):
|
|
|
106
112
|
texts.append(row[col])
|
|
107
113
|
|
|
108
114
|
text_embeddings = model.encode(
|
|
109
|
-
|
|
110
|
-
|
|
115
|
+
_create_dataloader_from_texts(
|
|
116
|
+
texts,
|
|
117
|
+
num_proc=num_proc,
|
|
111
118
|
**encode_kwargs,
|
|
112
119
|
),
|
|
113
120
|
task_metadata=self.task_metadata,
|
|
@@ -124,11 +131,15 @@ class ImageTextPairClassificationEvaluator(Evaluator):
|
|
|
124
131
|
dim=-1,
|
|
125
132
|
).view(len(self.dataset), self.num_texts_per_sample, -1)
|
|
126
133
|
|
|
134
|
+
def _image_collate_fn(batch):
|
|
135
|
+
"""Collate function for image batches."""
|
|
136
|
+
return {"image": [item["image"] for item in batch]}
|
|
137
|
+
|
|
127
138
|
image_embeddings = model.encode(
|
|
128
139
|
DataLoader(
|
|
129
140
|
CustomImageDataset(images),
|
|
130
|
-
collate_fn=
|
|
131
|
-
|
|
141
|
+
collate_fn=_image_collate_fn,
|
|
142
|
+
num_workers=num_proc if num_proc > 1 else 0,
|
|
132
143
|
),
|
|
133
144
|
task_metadata=self.task_metadata,
|
|
134
145
|
hf_subset=self.hf_subset,
|
|
@@ -1,8 +1,9 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import logging
|
|
2
|
-
from typing import Any, TypedDict
|
|
4
|
+
from typing import TYPE_CHECKING, Any, TypedDict
|
|
3
5
|
|
|
4
6
|
import numpy as np
|
|
5
|
-
from datasets import Dataset
|
|
6
7
|
from sklearn.metrics.pairwise import (
|
|
7
8
|
paired_cosine_distances,
|
|
8
9
|
paired_euclidean_distances,
|
|
@@ -11,10 +12,14 @@ from sklearn.metrics.pairwise import (
|
|
|
11
12
|
|
|
12
13
|
from mteb._create_dataloaders import _create_dataloader_from_texts, create_dataloader
|
|
13
14
|
from mteb._evaluators.evaluator import Evaluator
|
|
14
|
-
from mteb.abstasks.task_metadata import TaskMetadata
|
|
15
|
-
from mteb.models import EncoderProtocol
|
|
16
15
|
from mteb.similarity_functions import compute_pairwise_similarity
|
|
17
|
-
|
|
16
|
+
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
from datasets import Dataset
|
|
19
|
+
|
|
20
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
21
|
+
from mteb.models import EncoderProtocol
|
|
22
|
+
from mteb.types import EncodeKwargs, PromptType
|
|
18
23
|
|
|
19
24
|
logger = logging.getLogger(__name__)
|
|
20
25
|
|
|
@@ -85,7 +90,8 @@ class PairClassificationEvaluator(Evaluator):
|
|
|
85
90
|
def __call__(
|
|
86
91
|
self,
|
|
87
92
|
model: EncoderProtocol,
|
|
88
|
-
encode_kwargs:
|
|
93
|
+
encode_kwargs: EncodeKwargs,
|
|
94
|
+
num_proc: int = 1,
|
|
89
95
|
) -> PairClassificationDistances:
|
|
90
96
|
logger.info("Running pair classification - Encoding samples (1/2)")
|
|
91
97
|
embeddings1 = model.encode(
|
|
@@ -93,6 +99,7 @@ class PairClassificationEvaluator(Evaluator):
|
|
|
93
99
|
self.dataset,
|
|
94
100
|
task_metadata=self.task_metadata,
|
|
95
101
|
input_column=self.input1_column_name,
|
|
102
|
+
num_proc=num_proc,
|
|
96
103
|
**encode_kwargs,
|
|
97
104
|
),
|
|
98
105
|
task_metadata=self.task_metadata,
|
|
@@ -107,6 +114,7 @@ class PairClassificationEvaluator(Evaluator):
|
|
|
107
114
|
self.dataset,
|
|
108
115
|
task_metadata=self.task_metadata,
|
|
109
116
|
input_column=self.input2_column_name,
|
|
117
|
+
num_proc=num_proc,
|
|
110
118
|
**encode_kwargs,
|
|
111
119
|
),
|
|
112
120
|
task_metadata=self.task_metadata,
|
|
@@ -148,7 +156,9 @@ class PairClassificationEvaluator(Evaluator):
|
|
|
148
156
|
hf_subset: str,
|
|
149
157
|
**encode_kwargs: Any,
|
|
150
158
|
) -> np.ndarray:
|
|
151
|
-
index_map
|
|
159
|
+
index_map = {}
|
|
160
|
+
all_unique_texts: list[str] = []
|
|
161
|
+
all_texts_indexes = []
|
|
152
162
|
for text in all_texts:
|
|
153
163
|
text_hash = hash(text)
|
|
154
164
|
if text_hash not in index_map:
|