mteb 2.1.4__py3-none-any.whl → 2.7.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/__init__.py +6 -0
- mteb/_create_dataloaders.py +22 -20
- mteb/_evaluators/any_sts_evaluator.py +23 -14
- mteb/_evaluators/classification_metrics.py +54 -0
- mteb/_evaluators/clustering_evaluator.py +3 -3
- mteb/_evaluators/evaluator.py +4 -2
- mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +18 -11
- mteb/_evaluators/pair_classification_evaluator.py +34 -40
- mteb/_evaluators/retrieval_evaluator.py +2 -2
- mteb/_evaluators/retrieval_metrics.py +18 -17
- mteb/_evaluators/sklearn_evaluator.py +25 -37
- mteb/_evaluators/text/bitext_mining_evaluator.py +31 -19
- mteb/_evaluators/text/summarization_evaluator.py +27 -20
- mteb/_evaluators/zeroshot_classification_evaluator.py +7 -5
- mteb/abstasks/_data_filter/__init__.py +0 -0
- mteb/abstasks/_data_filter/filters.py +125 -0
- mteb/abstasks/_data_filter/task_pipelines.py +105 -0
- mteb/abstasks/_statistics_calculation.py +23 -11
- mteb/abstasks/_stratification.py +18 -18
- mteb/abstasks/abstask.py +35 -28
- mteb/abstasks/aggregate_task_metadata.py +1 -9
- mteb/abstasks/aggregated_task.py +10 -29
- mteb/abstasks/classification.py +15 -12
- mteb/abstasks/clustering.py +20 -16
- mteb/abstasks/clustering_legacy.py +13 -10
- mteb/abstasks/image/image_text_pair_classification.py +7 -4
- mteb/abstasks/multilabel_classification.py +33 -22
- mteb/abstasks/pair_classification.py +27 -11
- mteb/abstasks/regression.py +4 -4
- mteb/abstasks/retrieval.py +28 -24
- mteb/abstasks/retrieval_dataset_loaders.py +2 -2
- mteb/abstasks/sts.py +14 -4
- mteb/abstasks/task_metadata.py +32 -33
- mteb/abstasks/text/bitext_mining.py +39 -28
- mteb/abstasks/text/reranking.py +8 -6
- mteb/abstasks/text/summarization.py +10 -5
- mteb/abstasks/zeroshot_classification.py +8 -4
- mteb/benchmarks/_create_table.py +84 -37
- mteb/benchmarks/benchmark.py +77 -16
- mteb/benchmarks/benchmarks/__init__.py +12 -0
- mteb/benchmarks/benchmarks/benchmarks.py +361 -16
- mteb/benchmarks/get_benchmark.py +14 -53
- mteb/cache.py +227 -37
- mteb/cli/_display_tasks.py +2 -2
- mteb/cli/build_cli.py +110 -14
- mteb/cli/generate_model_card.py +43 -23
- mteb/deprecated_evaluator.py +71 -62
- mteb/descriptive_stats/BitextMining/RuSciBenchBitextMining.v2.json +61 -0
- mteb/descriptive_stats/Classification/HebrewSentimentAnalysis.v3.json +60 -0
- mteb/descriptive_stats/Classification/TurkishConstitutionalCourtViolation.json +54 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2CybersecurityRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EconomicRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EnergyRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2HrRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3ComputerScienceRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3EnergyRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceEnRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceFrRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3HrRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3IndustrialRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3NuclearRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PharmaceuticalsRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PhysicsRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3TelecomRetrieval.json +214 -0
- mteb/descriptive_stats/PairClassification/TERRa.V2.json +35 -0
- mteb/descriptive_stats/Reranking/JQaRARerankingLite.json +35 -0
- mteb/descriptive_stats/Reranking/JaCWIRRerankingLite.json +35 -0
- mteb/descriptive_stats/Reranking/MultiLongDocReranking.json +466 -0
- mteb/descriptive_stats/Retrieval/ArguAna-NL.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/ChemRxivRetrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/EuroPIRQRetrieval.json +116 -0
- mteb/descriptive_stats/Retrieval/JaCWIRRetrievalLite.json +30 -0
- mteb/descriptive_stats/Retrieval/JaqketRetrievalLite.json +30 -0
- mteb/descriptive_stats/Retrieval/MIRACLJaRetrievalLite.json +30 -0
- mteb/descriptive_stats/Retrieval/MrTyDiJaRetrievalLite.json +30 -0
- mteb/descriptive_stats/Retrieval/NFCorpus-NL.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoClimateFEVER-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoDBPedia-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoFEVER-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoHotpotQA-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoMSMARCO-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoNQ-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/SCIDOCS-NL.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/SQuADKorV1Retrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/SciFact-NL.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/TVPLRetrieval.json +30 -0
- mteb/evaluate.py +106 -75
- mteb/filter_tasks.py +25 -26
- mteb/get_tasks.py +29 -30
- mteb/languages/language_scripts.py +5 -3
- mteb/leaderboard/app.py +414 -151
- mteb/leaderboard/benchmark_selector.py +14 -5
- mteb/leaderboard/figures.py +13 -15
- mteb/leaderboard/table.py +82 -17
- mteb/load_results.py +12 -12
- mteb/models/__init__.py +4 -1
- mteb/models/abs_encoder.py +31 -23
- mteb/models/cache_wrappers/__init__.py +2 -1
- mteb/models/cache_wrappers/cache_backend_protocol.py +3 -5
- mteb/models/cache_wrappers/cache_backends/_hash_utils.py +7 -6
- mteb/models/cache_wrappers/cache_backends/faiss_cache.py +6 -2
- mteb/models/cache_wrappers/cache_backends/numpy_cache.py +43 -25
- mteb/models/cache_wrappers/cache_wrapper.py +3 -3
- mteb/models/get_model_meta.py +25 -118
- mteb/models/instruct_wrapper.py +33 -9
- mteb/models/model_implementations/align_models.py +8 -1
- mteb/models/model_implementations/amazon_models.py +1 -0
- mteb/models/model_implementations/andersborges.py +65 -0
- mteb/models/model_implementations/ara_models.py +9 -1
- mteb/models/model_implementations/arctic_models.py +16 -8
- mteb/models/model_implementations/b1ade_models.py +2 -1
- mteb/models/model_implementations/bedrock_models.py +4 -0
- mteb/models/model_implementations/bge_models.py +101 -17
- mteb/models/model_implementations/bica_model.py +35 -0
- mteb/models/model_implementations/blip2_models.py +13 -2
- mteb/models/model_implementations/blip_models.py +43 -16
- mteb/models/model_implementations/bm25.py +5 -4
- mteb/models/model_implementations/bmretriever_models.py +10 -4
- mteb/models/model_implementations/cadet_models.py +10 -1
- mteb/models/model_implementations/cde_models.py +25 -4
- mteb/models/model_implementations/clip_models.py +9 -6
- mteb/models/model_implementations/clips_models.py +100 -0
- mteb/models/model_implementations/codefuse_models.py +165 -3
- mteb/models/model_implementations/codesage_models.py +18 -3
- mteb/models/model_implementations/cohere_models.py +13 -6
- mteb/models/model_implementations/cohere_v.py +7 -2
- mteb/models/model_implementations/colpali_models.py +17 -9
- mteb/models/model_implementations/colqwen_models.py +275 -5
- mteb/models/model_implementations/colsmol_models.py +4 -2
- mteb/models/model_implementations/conan_models.py +2 -1
- mteb/models/model_implementations/dino_models.py +194 -23
- mteb/models/model_implementations/e5_instruct.py +27 -4
- mteb/models/model_implementations/e5_models.py +21 -110
- mteb/models/model_implementations/e5_v.py +7 -6
- mteb/models/model_implementations/eagerworks_models.py +164 -0
- mteb/models/model_implementations/emillykkejensen_models.py +91 -0
- mteb/models/model_implementations/en_code_retriever.py +2 -1
- mteb/models/model_implementations/euler_models.py +32 -0
- mteb/models/model_implementations/evaclip_models.py +4 -0
- mteb/models/model_implementations/fa_models.py +67 -9
- mteb/models/model_implementations/facebookai.py +205 -0
- mteb/models/model_implementations/geogpt_models.py +2 -1
- mteb/models/model_implementations/gme_v_models.py +17 -10
- mteb/models/model_implementations/google_models.py +17 -6
- mteb/models/model_implementations/granite_vision_embedding_models.py +8 -3
- mteb/models/model_implementations/gritlm_models.py +4 -2
- mteb/models/model_implementations/gte_models.py +99 -9
- mteb/models/model_implementations/hinvec_models.py +2 -1
- mteb/models/model_implementations/human.py +1 -0
- mteb/models/model_implementations/ibm_granite_models.py +36 -6
- mteb/models/model_implementations/inf_models.py +4 -2
- mteb/models/model_implementations/jasper_models.py +256 -3
- mteb/models/model_implementations/jina_clip.py +49 -10
- mteb/models/model_implementations/jina_models.py +222 -11
- mteb/models/model_implementations/kalm_models.py +203 -25
- mteb/models/model_implementations/kblab.py +37 -0
- mteb/models/model_implementations/kennethenevoldsen_models.py +74 -0
- mteb/models/model_implementations/kfst.py +25 -0
- mteb/models/model_implementations/kowshik24_models.py +32 -0
- mteb/models/model_implementations/lens_models.py +2 -0
- mteb/models/model_implementations/lgai_embedding_models.py +2 -1
- mteb/models/model_implementations/linq_models.py +4 -3
- mteb/models/model_implementations/listconranker.py +2 -2
- mteb/models/model_implementations/llm2clip_models.py +9 -6
- mteb/models/model_implementations/llm2vec_models.py +16 -8
- mteb/models/model_implementations/mcinext_models.py +7 -1
- mteb/models/model_implementations/mdbr_models.py +19 -3
- mteb/models/model_implementations/misc_models.py +422 -60
- mteb/models/model_implementations/mixedbread_ai_models.py +332 -0
- mteb/models/model_implementations/mme5_models.py +2 -1
- mteb/models/model_implementations/moco_models.py +15 -4
- mteb/models/model_implementations/mod_models.py +191 -0
- mteb/models/model_implementations/model2vec_models.py +27 -14
- mteb/models/model_implementations/moka_models.py +4 -1
- mteb/models/model_implementations/nbailab.py +70 -0
- mteb/models/model_implementations/no_instruct_sentence_models.py +3 -2
- mteb/models/model_implementations/nomic_models.py +173 -6
- mteb/models/model_implementations/nomic_models_vision.py +8 -3
- mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +32 -19
- mteb/models/model_implementations/nvidia_models.py +155 -20
- mteb/models/model_implementations/octen_models.py +254 -0
- mteb/models/model_implementations/openai_models.py +20 -16
- mteb/models/model_implementations/openclip_models.py +37 -13
- mteb/models/model_implementations/opensearch_neural_sparse_models.py +10 -5
- mteb/models/model_implementations/ops_moa_models.py +5 -3
- mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +1 -1
- mteb/models/model_implementations/pawan_models.py +39 -0
- mteb/models/model_implementations/piccolo_models.py +9 -1
- mteb/models/model_implementations/pixie_models.py +56 -0
- mteb/models/model_implementations/promptriever_models.py +12 -8
- mteb/models/model_implementations/pylate_models.py +46 -12
- mteb/models/model_implementations/qodo_models.py +4 -2
- mteb/models/model_implementations/qtack_models.py +2 -1
- mteb/models/model_implementations/qwen3_models.py +9 -6
- mteb/models/model_implementations/qzhou_models.py +5 -3
- mteb/models/model_implementations/random_baseline.py +19 -24
- mteb/models/model_implementations/rasgaard_models.py +34 -0
- mteb/models/model_implementations/reasonir_model.py +2 -1
- mteb/models/model_implementations/repllama_models.py +5 -3
- mteb/models/model_implementations/rerankers_custom.py +15 -9
- mteb/models/model_implementations/rerankers_monot5_based.py +31 -31
- mteb/models/model_implementations/richinfoai_models.py +2 -1
- mteb/models/model_implementations/ru_sentence_models.py +71 -20
- mteb/models/model_implementations/ruri_models.py +322 -0
- mteb/models/model_implementations/salesforce_models.py +6 -3
- mteb/models/model_implementations/samilpwc_models.py +2 -1
- mteb/models/model_implementations/sarashina_embedding_models.py +168 -0
- mteb/models/model_implementations/searchmap_models.py +2 -1
- mteb/models/model_implementations/seed_1_6_embedding_models.py +8 -2
- mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +625 -0
- mteb/models/model_implementations/seed_models.py +1 -0
- mteb/models/model_implementations/sentence_transformers_models.py +177 -18
- mteb/models/model_implementations/shuu_model.py +32 -31
- mteb/models/model_implementations/siglip_models.py +30 -20
- mteb/models/model_implementations/slm_models.py +416 -0
- mteb/models/model_implementations/sonar_models.py +1 -0
- mteb/models/model_implementations/spartan8806_atles_champion.py +34 -0
- mteb/models/model_implementations/stella_models.py +23 -4
- mteb/models/model_implementations/tarka_models.py +376 -0
- mteb/models/model_implementations/text2vec_models.py +9 -3
- mteb/models/model_implementations/ua_sentence_models.py +11 -1
- mteb/models/model_implementations/uae_models.py +8 -1
- mteb/models/model_implementations/vdr_models.py +3 -1
- mteb/models/model_implementations/vi_vn_models.py +45 -6
- mteb/models/model_implementations/vista_models.py +2 -0
- mteb/models/model_implementations/vlm2vec_models.py +5 -3
- mteb/models/model_implementations/voyage_models.py +99 -0
- mteb/models/model_implementations/voyage_v.py +17 -9
- mteb/models/model_implementations/xyz_models.py +1 -0
- mteb/models/model_implementations/youtu_models.py +2 -1
- mteb/models/model_implementations/yuan_models.py +34 -0
- mteb/models/model_implementations/yuan_models_en.py +58 -0
- mteb/models/model_meta.py +498 -29
- mteb/models/models_protocols.py +22 -6
- mteb/models/search_encoder_index/__init__.py +7 -0
- mteb/models/search_encoder_index/search_backend_protocol.py +50 -0
- mteb/models/search_encoder_index/search_indexes/__init__.py +5 -0
- mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +160 -0
- mteb/models/search_wrappers.py +197 -65
- mteb/models/sentence_transformer_wrapper.py +52 -32
- mteb/models/vllm_wrapper.py +327 -0
- mteb/py.typed +0 -0
- mteb/results/benchmark_results.py +114 -65
- mteb/results/model_result.py +63 -26
- mteb/results/task_result.py +117 -77
- mteb/similarity_functions.py +60 -7
- mteb/tasks/bitext_mining/multilingual/__init__.py +2 -1
- mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +4 -2
- mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining_fast.py +1 -1
- mteb/tasks/bitext_mining/multilingual/ru_sci_bench_bitext_mining.py +47 -5
- mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -6
- mteb/tasks/classification/ara/ajgt.py +1 -2
- mteb/tasks/classification/ara/hotel_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/ara/tweet_emotion_classification.py +1 -2
- mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -2
- mteb/tasks/classification/ben/bengali_document_classification.py +1 -2
- mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -2
- mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -2
- mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/ces/czech_product_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/ces/czech_so_me_sentiment_classification.py +1 -2
- mteb/tasks/classification/dan/angry_tweets_classification.py +1 -2
- mteb/tasks/classification/dan/danish_political_comments_classification.py +1 -2
- mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -2
- mteb/tasks/classification/dan/dk_hate_classification.py +2 -3
- mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -2
- mteb/tasks/classification/deu/ten_k_gnad_classification.py +1 -2
- mteb/tasks/classification/eng/amazon_polarity_classification.py +1 -2
- mteb/tasks/classification/eng/arxiv_classification.py +1 -2
- mteb/tasks/classification/eng/banking77_classification.py +1 -2
- mteb/tasks/classification/eng/dbpedia_classification.py +1 -2
- mteb/tasks/classification/eng/emotion_classification.py +1 -2
- mteb/tasks/classification/eng/financial_phrasebank_classification.py +1 -2
- mteb/tasks/classification/eng/frenk_en_classification.py +1 -2
- mteb/tasks/classification/eng/gtsrb_classification.py +1 -1
- mteb/tasks/classification/eng/imdb_classification.py +1 -2
- mteb/tasks/classification/eng/legal_bench_classification.py +14 -120
- mteb/tasks/classification/eng/news_classification.py +1 -2
- mteb/tasks/classification/eng/patch_camelyon_classification.py +1 -1
- mteb/tasks/classification/eng/patent_classification.py +1 -2
- mteb/tasks/classification/eng/poem_sentiment_classification.py +1 -2
- mteb/tasks/classification/eng/sds_eye_protection_classification.py +1 -2
- mteb/tasks/classification/eng/sds_gloves_classification.py +1 -2
- mteb/tasks/classification/eng/toxic_chat_classification.py +2 -19
- mteb/tasks/classification/eng/toxic_conversations_classification.py +1 -2
- mteb/tasks/classification/eng/tweet_sentiment_extraction_classification.py +1 -2
- mteb/tasks/classification/eng/tweet_topic_single_classification.py +2 -13
- mteb/tasks/classification/eng/ucf101_classification.py +1 -5
- mteb/tasks/classification/eng/wikipedia_bio_met_chem_classification.py +1 -2
- mteb/tasks/classification/eng/wikipedia_chem_fields_classification.py +1 -2
- mteb/tasks/classification/eng/wikipedia_comp_chem_spectroscopy_classification.py +1 -2
- mteb/tasks/classification/eng/wikipedia_crystallography_analytical_classification.py +1 -2
- mteb/tasks/classification/eng/wikipedia_theoretical_applied_classification.py +1 -2
- mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -2
- mteb/tasks/classification/eng/yelp_review_full_classification.py +1 -2
- mteb/tasks/classification/est/estonian_valence.py +2 -3
- mteb/tasks/classification/fas/fa_mteb_classification.py +7 -14
- mteb/tasks/classification/fil/filipino_hate_speech_classification.py +1 -2
- mteb/tasks/classification/fin/fin_toxicity_classification.py +2 -11
- mteb/tasks/classification/fra/french_book_reviews.py +1 -2
- mteb/tasks/classification/fra/movie_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/guj/gujarati_news_classification.py +1 -2
- mteb/tasks/classification/heb/__init__.py +6 -1
- mteb/tasks/classification/heb/hebrew_sentiment_analysis.py +62 -4
- mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -2
- mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -2
- mteb/tasks/classification/hrv/frenk_hr_classification.py +1 -2
- mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +1 -2
- mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -2
- mteb/tasks/classification/ita/italian_linguist_acceptability_classification.py +1 -2
- mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -2
- mteb/tasks/classification/jpn/wrime_classification.py +1 -2
- mteb/tasks/classification/kan/kannada_news_classification.py +1 -2
- mteb/tasks/classification/kor/klue_tc.py +1 -2
- mteb/tasks/classification/kor/kor_hate_classification.py +2 -17
- mteb/tasks/classification/kor/kor_sarcasm_classification.py +2 -19
- mteb/tasks/classification/kur/kurdish_sentiment_classification.py +3 -4
- mteb/tasks/classification/mal/malayalam_news_classification.py +1 -2
- mteb/tasks/classification/mar/marathi_news_classification.py +1 -2
- mteb/tasks/classification/mkd/macedonian_tweet_sentiment_classification.py +1 -2
- mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -6
- mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -4
- mteb/tasks/classification/multilingual/ru_sci_bench_classification.py +4 -23
- mteb/tasks/classification/multilingual/scala_classification.py +2 -3
- mteb/tasks/classification/multilingual/sib200_classification.py +1 -6
- mteb/tasks/classification/mya/myanmar_news.py +1 -2
- mteb/tasks/classification/nep/nepali_news_classification.py +1 -2
- mteb/tasks/classification/nld/dutch_book_review_sentiment_classification.py +4 -2
- mteb/tasks/classification/nld/dutch_cola_classification.py +3 -0
- mteb/tasks/classification/nld/dutch_government_bias_classification.py +3 -0
- mteb/tasks/classification/nld/dutch_news_articles_classification.py +3 -0
- mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +3 -0
- mteb/tasks/classification/nld/iconclass_classification.py +3 -0
- mteb/tasks/classification/nld/open_tender_classification.py +3 -0
- mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +3 -0
- mteb/tasks/classification/nob/no_rec_classification.py +1 -2
- mteb/tasks/classification/nob/norwegian_parliament_classification.py +1 -2
- mteb/tasks/classification/ory/odia_news_classification.py +1 -2
- mteb/tasks/classification/pol/polish_classification.py +3 -6
- mteb/tasks/classification/ron/moroco.py +1 -2
- mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -2
- mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -2
- mteb/tasks/classification/rus/georeview_classification.py +1 -2
- mteb/tasks/classification/rus/headline_classification.py +1 -2
- mteb/tasks/classification/rus/inappropriateness_classification.py +1 -2
- mteb/tasks/classification/rus/ru_reviews_classification.py +1 -2
- mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -2
- mteb/tasks/classification/rus/senti_ru_eval.py +1 -2
- mteb/tasks/classification/sin/sinhala_news_classification.py +1 -2
- mteb/tasks/classification/sin/sinhala_news_source_classification.py +1 -2
- mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/slk/slovak_hate_speech_classification.py +1 -2
- mteb/tasks/classification/slk/slovak_movie_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/slv/frenk_sl_classification.py +1 -2
- mteb/tasks/classification/spa/spanish_news_classification.py +1 -2
- mteb/tasks/classification/spa/spanish_sentiment_classification.py +1 -2
- mteb/tasks/classification/ssw/siswati_news_classification.py +1 -2
- mteb/tasks/classification/swa/swahili_news_classification.py +1 -2
- mteb/tasks/classification/swe/dalaj_classification.py +1 -2
- mteb/tasks/classification/swe/swe_rec_classification.py +1 -2
- mteb/tasks/classification/swe/swedish_sentiment_classification.py +1 -2
- mteb/tasks/classification/tam/tamil_news_classification.py +1 -2
- mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +1 -2
- mteb/tasks/classification/tha/wisesight_sentiment_classification.py +1 -2
- mteb/tasks/classification/tsn/tswana_news_classification.py +1 -2
- mteb/tasks/classification/tur/__init__.py +4 -0
- mteb/tasks/classification/tur/turkish_constitutional_court.py +41 -0
- mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +1 -2
- mteb/tasks/classification/tur/turkish_product_sentiment_classification.py +1 -2
- mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -15
- mteb/tasks/classification/urd/urdu_roman_sentiment_classification.py +1 -2
- mteb/tasks/classification/vie/amazon_counterfactual_vn_classification.py +1 -6
- mteb/tasks/classification/vie/amazon_polarity_vn_classification.py +1 -6
- mteb/tasks/classification/vie/amazon_reviews_vn_classification.py +1 -5
- mteb/tasks/classification/vie/banking77_vn_classification.py +1 -5
- mteb/tasks/classification/vie/emotion_vn_classification.py +1 -5
- mteb/tasks/classification/vie/imdb_vn_classification.py +1 -5
- mteb/tasks/classification/vie/massive_intent_vn_classification.py +1 -5
- mteb/tasks/classification/vie/massive_scenario_vn_classification.py +1 -5
- mteb/tasks/classification/vie/mtop_domain_vn_classification.py +1 -5
- mteb/tasks/classification/vie/mtop_intent_vn_classification.py +1 -5
- mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -5
- mteb/tasks/classification/vie/tweet_sentiment_extraction_vn_classification.py +1 -5
- mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -2
- mteb/tasks/classification/zho/cmteb_classification.py +5 -10
- mteb/tasks/classification/zho/yue_openrice_review_classification.py +1 -2
- mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -2
- mteb/tasks/clustering/eng/hume_wiki_cities_clustering.py +1 -1
- mteb/tasks/clustering/eng/wiki_cities_clustering.py +1 -1
- mteb/tasks/clustering/jpn/mews_c16_ja_clustering.py +1 -3
- mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -6
- mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +3 -0
- mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +3 -0
- mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +3 -0
- mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +3 -0
- mteb/tasks/clustering/nld/open_tender_clustering_s2s.py +3 -0
- mteb/tasks/clustering/nld/vabb_clustering_p2p.py +3 -0
- mteb/tasks/clustering/nld/vabb_clustering_s2s.py +3 -0
- mteb/tasks/clustering/vie/reddit_clustering_p2p_vn.py +1 -5
- mteb/tasks/clustering/vie/reddit_clustering_vn.py +1 -5
- mteb/tasks/clustering/vie/stack_exchange_clustering_p2p_vn.py +1 -5
- mteb/tasks/clustering/vie/stack_exchange_clustering_vn.py +1 -5
- mteb/tasks/clustering/vie/twenty_newsgroups_clustering_vn.py +1 -5
- mteb/tasks/clustering/zho/cmteb_clustering.py +2 -2
- mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +1 -1
- mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -5
- mteb/tasks/multilabel_classification/kor/kor_hate_speech_ml_classification.py +1 -9
- mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -6
- mteb/tasks/multilabel_classification/nld/covid_disinformation_nl_multi_label_classification.py +3 -0
- mteb/tasks/multilabel_classification/nld/vabb_multi_label_classification.py +3 -0
- mteb/tasks/multilabel_classification/por/brazilian_toxic_tweets_classification.py +1 -6
- mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
- mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -2
- mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -6
- mteb/tasks/pair_classification/eng/legal_bench_pc.py +1 -9
- mteb/tasks/pair_classification/nld/sick_nl_pair_classification.py +3 -0
- mteb/tasks/pair_classification/nld/xlwic_nl_pair_classification.py +3 -0
- mteb/tasks/pair_classification/rus/__init__.py +2 -2
- mteb/tasks/pair_classification/rus/terra.py +51 -25
- mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -5
- mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -5
- mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -5
- mteb/tasks/regression/multilingual/ru_sci_bench_regression.py +2 -6
- mteb/tasks/reranking/jpn/__init__.py +9 -1
- mteb/tasks/reranking/jpn/j_qa_ra_reranking_lite.py +49 -0
- mteb/tasks/reranking/jpn/ja_cwir_reranking_lite.py +47 -0
- mteb/tasks/reranking/multilingual/__init__.py +2 -0
- mteb/tasks/reranking/multilingual/multi_long_doc_reranking.py +70 -0
- mteb/tasks/reranking/multilingual/wikipedia_reranking_multilingual.py +1 -1
- mteb/tasks/reranking/multilingual/x_glue_wpr_reranking.py +1 -2
- mteb/tasks/reranking/vie/ask_ubuntu_dup_questions_vn.py +1 -5
- mteb/tasks/reranking/vie/sci_docs_reranking_vn.py +1 -5
- mteb/tasks/reranking/vie/stack_overflow_dup_questions_vn.py +1 -5
- mteb/tasks/retrieval/code/code_rag.py +12 -12
- mteb/tasks/retrieval/code/fresh_stack_retrieval.py +8 -5
- mteb/tasks/retrieval/dan/dan_fever_retrieval.py +1 -1
- mteb/tasks/retrieval/dan/tv2_nordretrieval.py +2 -2
- mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +2 -2
- mteb/tasks/retrieval/eng/__init__.py +2 -0
- mteb/tasks/retrieval/eng/chemrxiv.py +33 -0
- mteb/tasks/retrieval/eng/cub200_i2i_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -8
- mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +4 -0
- mteb/tasks/retrieval/jpn/__init__.py +8 -0
- mteb/tasks/retrieval/jpn/ja_cwir_retrieval.py +1 -4
- mteb/tasks/retrieval/jpn/ja_cwir_retrieval_lite.py +47 -0
- mteb/tasks/retrieval/jpn/jaqket_retrieval_lite.py +50 -0
- mteb/tasks/retrieval/jpn/miracl_ja_retrieval_lite.py +52 -0
- mteb/tasks/retrieval/jpn/mr_tydi_ja_retrieval_lite.py +48 -0
- mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +11 -4
- mteb/tasks/retrieval/kor/__init__.py +16 -1
- mteb/tasks/retrieval/kor/kovidore2_bench_retrieval.py +142 -0
- mteb/tasks/retrieval/kor/squad_kor_v1_retrieval.py +47 -0
- mteb/tasks/retrieval/multilingual/__init__.py +24 -0
- mteb/tasks/retrieval/multilingual/belebele_retrieval.py +5 -4
- mteb/tasks/retrieval/multilingual/euro_pirq_retrieval.py +43 -0
- mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +56 -42
- mteb/tasks/retrieval/multilingual/mkqa_retrieval.py +1 -2
- mteb/tasks/retrieval/multilingual/mlqa_retrieval.py +1 -4
- mteb/tasks/retrieval/multilingual/multi_long_doc_retrieval.py +1 -2
- mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +9 -4
- mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +2 -12
- mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +4 -2
- mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +389 -0
- mteb/tasks/retrieval/nld/__init__.py +8 -4
- mteb/tasks/retrieval/nld/argu_ana_nl_retrieval.py +46 -27
- mteb/tasks/retrieval/nld/bbsard_nl_retrieval.py +3 -0
- mteb/tasks/retrieval/nld/dutch_news_articles_retrieval.py +3 -0
- mteb/tasks/retrieval/nld/legal_qa_nl_retrieval.py +3 -0
- mteb/tasks/retrieval/nld/nf_corpus_nl_retrieval.py +42 -25
- mteb/tasks/retrieval/nld/open_tender_retrieval.py +3 -0
- mteb/tasks/retrieval/nld/sci_fact_nl_retrieval.py +42 -24
- mteb/tasks/retrieval/nld/scidocsnl_retrieval.py +44 -27
- mteb/tasks/retrieval/nld/vabb_retrieval.py +3 -0
- mteb/tasks/retrieval/nob/norquad.py +2 -2
- mteb/tasks/retrieval/nob/snl_retrieval.py +2 -2
- mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -7
- mteb/tasks/retrieval/tur/tur_hist_quad.py +1 -1
- mteb/tasks/retrieval/vie/__init__.py +14 -6
- mteb/tasks/retrieval/vie/argu_ana_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +40 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_android_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_gis_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_mathematica_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_physics_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_programmers_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_stats_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_tex_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_unix_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_webmasters_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_wordpress_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +40 -5
- mteb/tasks/retrieval/vie/fevervn_retrieval.py +40 -7
- mteb/tasks/retrieval/vie/fi_qa2018_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/green_node_table_markdown_retrieval.py +16 -1
- mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +40 -6
- mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +49 -5
- mteb/tasks/retrieval/vie/nf_corpus_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/nqvn_retrieval.py +40 -5
- mteb/tasks/retrieval/vie/quora_vn_retrieval.py +1 -6
- mteb/tasks/retrieval/vie/sci_fact_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/scidocsvn_retrieval.py +1 -6
- mteb/tasks/retrieval/vie/touche2020_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/treccovidvn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/tvpl_retrieval.py +42 -0
- mteb/tasks/retrieval/vie/zac_legal_text_retrieval.py +15 -1
- mteb/tasks/sts/nld/sick_nl_sts.py +1 -0
- mteb/tasks/sts/vie/biosses_stsvn.py +1 -5
- mteb/tasks/sts/vie/sickr_stsvn.py +1 -5
- mteb/tasks/sts/vie/sts_benchmark_stsvn.py +1 -5
- mteb/tasks/zeroshot_classification/eng/gtsrb.py +1 -1
- mteb/tasks/zeroshot_classification/eng/patch_camelyon.py +1 -1
- mteb/tasks/zeroshot_classification/eng/ucf101.py +1 -5
- mteb/types/__init__.py +2 -0
- mteb/types/_encoder_io.py +19 -2
- mteb/types/_result.py +2 -1
- mteb/types/statistics.py +9 -3
- {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/METADATA +25 -8
- {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/RECORD +525 -438
- mteb/models/model_implementations/mxbai_models.py +0 -102
- mteb/models/model_implementations/nb_sbert.py +0 -25
- {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/WHEEL +0 -0
- {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/entry_points.txt +0 -0
- {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/top_level.txt +0 -0
|
@@ -1,15 +1,15 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from typing import Any, Protocol
|
|
2
|
+
from typing import Any, Protocol, cast
|
|
3
3
|
|
|
4
4
|
import numpy as np
|
|
5
5
|
from datasets import Dataset
|
|
6
6
|
from torch.utils.data import DataLoader
|
|
7
7
|
from typing_extensions import Self
|
|
8
8
|
|
|
9
|
-
from mteb._create_dataloaders import
|
|
9
|
+
from mteb._create_dataloaders import create_dataloader
|
|
10
10
|
from mteb.abstasks.task_metadata import TaskMetadata
|
|
11
11
|
from mteb.models import EncoderProtocol
|
|
12
|
-
from mteb.types import BatchedInput
|
|
12
|
+
from mteb.types import Array, BatchedInput, EncodeKwargs
|
|
13
13
|
|
|
14
14
|
from .evaluator import Evaluator
|
|
15
15
|
|
|
@@ -17,11 +17,11 @@ logger = logging.getLogger(__name__)
|
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
class SklearnModelProtocol(Protocol):
|
|
20
|
-
def fit(self, X:
|
|
21
|
-
def predict(self, X:
|
|
20
|
+
def fit(self, X: Array, y: np.ndarray | list[int]) -> None: ... # noqa: N803
|
|
21
|
+
def predict(self, X: Array) -> np.ndarray: ... # noqa: N803
|
|
22
22
|
def get_params(self) -> dict[str, Any]: ...
|
|
23
|
-
def set_params(self, **kwargs: dict[str, Any]) -> Self: ...
|
|
24
|
-
def score(self, X:
|
|
23
|
+
def set_params(self, random_state: int, **kwargs: dict[str, Any]) -> Self: ...
|
|
24
|
+
def score(self, X: Array, y: np.ndarray | list[int]) -> float: ... # noqa: N803
|
|
25
25
|
|
|
26
26
|
|
|
27
27
|
class SklearnEvaluator(Evaluator):
|
|
@@ -50,42 +50,29 @@ class SklearnEvaluator(Evaluator):
|
|
|
50
50
|
self.evaluator_model = evaluator_model
|
|
51
51
|
|
|
52
52
|
def create_dataloaders(
|
|
53
|
-
self,
|
|
53
|
+
self, encode_kwargs: EncodeKwargs
|
|
54
54
|
) -> tuple[DataLoader[BatchedInput], DataLoader[BatchedInput]]:
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
if self.values_column_name != "text":
|
|
68
|
-
self.train_dataset = self.train_dataset.rename_column(
|
|
69
|
-
self.values_column_name, "text"
|
|
70
|
-
)
|
|
71
|
-
self.eval_dataset = self.eval_dataset.rename_column(
|
|
72
|
-
self.values_column_name, "text"
|
|
73
|
-
)
|
|
74
|
-
dataloader_train = DataLoader(self.train_dataset)
|
|
75
|
-
dataloader_test = DataLoader(self.eval_dataset)
|
|
76
|
-
else:
|
|
77
|
-
raise ValueError(
|
|
78
|
-
"ClassificationEvaluator only supports image and text modalities."
|
|
79
|
-
)
|
|
55
|
+
dataloader_train = create_dataloader(
|
|
56
|
+
self.train_dataset,
|
|
57
|
+
self.task_metadata,
|
|
58
|
+
input_column=self.values_column_name,
|
|
59
|
+
**encode_kwargs,
|
|
60
|
+
)
|
|
61
|
+
dataloader_test = create_dataloader(
|
|
62
|
+
self.eval_dataset,
|
|
63
|
+
self.task_metadata,
|
|
64
|
+
input_column=self.values_column_name,
|
|
65
|
+
**encode_kwargs,
|
|
66
|
+
)
|
|
80
67
|
return dataloader_train, dataloader_test
|
|
81
68
|
|
|
82
69
|
def __call__( # type: ignore[override]
|
|
83
70
|
self,
|
|
84
71
|
model: EncoderProtocol,
|
|
85
72
|
*,
|
|
86
|
-
encode_kwargs:
|
|
87
|
-
test_cache:
|
|
88
|
-
) -> tuple[np.ndarray,
|
|
73
|
+
encode_kwargs: EncodeKwargs,
|
|
74
|
+
test_cache: Array | None = None,
|
|
75
|
+
) -> tuple[np.ndarray, Array]:
|
|
89
76
|
"""Classification evaluation by training a sklearn classifier on the embeddings of the training set and evaluating on the embeddings of the test set.
|
|
90
77
|
|
|
91
78
|
Args:
|
|
@@ -98,7 +85,7 @@ class SklearnEvaluator(Evaluator):
|
|
|
98
85
|
|
|
99
86
|
"""
|
|
100
87
|
dataloader_train, dataloader_test = self.create_dataloaders(
|
|
101
|
-
|
|
88
|
+
encode_kwargs=encode_kwargs,
|
|
102
89
|
)
|
|
103
90
|
|
|
104
91
|
logger.info("Running - Encoding samples...")
|
|
@@ -117,6 +104,7 @@ class SklearnEvaluator(Evaluator):
|
|
|
117
104
|
hf_subset=self.hf_subset,
|
|
118
105
|
**encode_kwargs,
|
|
119
106
|
)
|
|
107
|
+
test_cache = cast(Array, test_cache)
|
|
120
108
|
|
|
121
109
|
logger.info("Running - Fitting classifier...")
|
|
122
110
|
y_train = self.train_dataset[self.label_column_name]
|
|
@@ -1,7 +1,5 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from typing import Any
|
|
3
2
|
|
|
4
|
-
import numpy as np
|
|
5
3
|
import torch
|
|
6
4
|
from datasets import Dataset
|
|
7
5
|
from tqdm.auto import tqdm
|
|
@@ -10,6 +8,7 @@ from mteb._create_dataloaders import _create_dataloader_from_texts
|
|
|
10
8
|
from mteb._evaluators.evaluator import Evaluator
|
|
11
9
|
from mteb.abstasks.task_metadata import TaskMetadata
|
|
12
10
|
from mteb.models import EncoderProtocol
|
|
11
|
+
from mteb.types import Array, EncodeKwargs
|
|
13
12
|
|
|
14
13
|
logger = logging.getLogger(__name__)
|
|
15
14
|
|
|
@@ -33,7 +32,10 @@ class BitextMiningEvaluator(Evaluator):
|
|
|
33
32
|
self.task_metadata = task_metadata
|
|
34
33
|
|
|
35
34
|
def __call__(
|
|
36
|
-
self,
|
|
35
|
+
self,
|
|
36
|
+
model: EncoderProtocol,
|
|
37
|
+
*,
|
|
38
|
+
encode_kwargs: EncodeKwargs,
|
|
37
39
|
) -> dict[str, list[dict[str, float]]]:
|
|
38
40
|
pair_elements = {p for pair in self.pairs for p in pair}
|
|
39
41
|
if isinstance(self.sentences, Dataset):
|
|
@@ -46,7 +48,10 @@ class BitextMiningEvaluator(Evaluator):
|
|
|
46
48
|
|
|
47
49
|
embeddings = {}
|
|
48
50
|
for sub in tqdm(subsets):
|
|
49
|
-
dataloader = _create_dataloader_from_texts(
|
|
51
|
+
dataloader = _create_dataloader_from_texts(
|
|
52
|
+
self.sentences[sub],
|
|
53
|
+
**encode_kwargs,
|
|
54
|
+
)
|
|
50
55
|
embeddings[sub] = model.encode(
|
|
51
56
|
dataloader,
|
|
52
57
|
task_metadata=self.task_metadata,
|
|
@@ -66,11 +71,11 @@ class BitextMiningEvaluator(Evaluator):
|
|
|
66
71
|
|
|
67
72
|
def _similarity_search(
|
|
68
73
|
self,
|
|
69
|
-
query_embeddings:
|
|
70
|
-
corpus_embeddings:
|
|
74
|
+
query_embeddings: Array,
|
|
75
|
+
corpus_embeddings: Array,
|
|
71
76
|
model: EncoderProtocol,
|
|
72
77
|
query_chunk_size: int = 100,
|
|
73
|
-
corpus_chunk_size: int =
|
|
78
|
+
corpus_chunk_size: int = 500_000,
|
|
74
79
|
) -> list[dict[str, float]]:
|
|
75
80
|
"""This function performs a cosine similarity search between a list of query embeddings and a list of corpus embeddings.
|
|
76
81
|
|
|
@@ -101,13 +106,15 @@ class BitextMiningEvaluator(Evaluator):
|
|
|
101
106
|
):
|
|
102
107
|
query_embeddings = query_embeddings.to(corpus_embeddings.device)
|
|
103
108
|
|
|
104
|
-
queries_result_list
|
|
109
|
+
queries_result_list: list[list[dict[str, float]]] = [
|
|
110
|
+
[] for _ in range(len(query_embeddings))
|
|
111
|
+
]
|
|
105
112
|
|
|
106
113
|
for query_start_idx in range(0, len(query_embeddings), query_chunk_size):
|
|
107
114
|
# Iterate over chunks of the corpus
|
|
108
115
|
for corpus_start_idx in range(0, len(corpus_embeddings), corpus_chunk_size):
|
|
109
116
|
# Compute cosine similarities
|
|
110
|
-
similarity_scores = model.similarity(
|
|
117
|
+
similarity_scores = model.similarity(
|
|
111
118
|
query_embeddings[
|
|
112
119
|
query_start_idx : query_start_idx + query_chunk_size
|
|
113
120
|
],
|
|
@@ -117,15 +124,17 @@ class BitextMiningEvaluator(Evaluator):
|
|
|
117
124
|
)
|
|
118
125
|
|
|
119
126
|
# Get top-k scores
|
|
120
|
-
|
|
121
|
-
torch.
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
127
|
+
cos_scores_top_k_values_tensor, cos_scores_top_k_idx_tensor = (
|
|
128
|
+
torch.topk(
|
|
129
|
+
torch.tensor(similarity_scores),
|
|
130
|
+
1,
|
|
131
|
+
dim=1,
|
|
132
|
+
largest=True,
|
|
133
|
+
sorted=False,
|
|
134
|
+
)
|
|
126
135
|
)
|
|
127
|
-
cos_scores_top_k_values =
|
|
128
|
-
cos_scores_top_k_idx =
|
|
136
|
+
cos_scores_top_k_values = cos_scores_top_k_values_tensor.cpu().tolist()
|
|
137
|
+
cos_scores_top_k_idx = cos_scores_top_k_idx_tensor.cpu().tolist()
|
|
129
138
|
|
|
130
139
|
for query_itr in range(len(similarity_scores)):
|
|
131
140
|
for sub_corpus_id, score in zip(
|
|
@@ -138,11 +147,14 @@ class BitextMiningEvaluator(Evaluator):
|
|
|
138
147
|
{"corpus_id": corpus_id, "score": score}
|
|
139
148
|
)
|
|
140
149
|
|
|
150
|
+
result_queries_list: list[dict[str, float]] = [
|
|
151
|
+
{} for _ in range(len(query_embeddings))
|
|
152
|
+
]
|
|
141
153
|
# Sort and strip to top_k results
|
|
142
154
|
for idx in range(len(queries_result_list)):
|
|
143
155
|
queries_result_list[idx] = sorted(
|
|
144
156
|
queries_result_list[idx], key=lambda x: x["score"], reverse=True
|
|
145
157
|
)
|
|
146
|
-
|
|
158
|
+
result_queries_list[idx] = queries_result_list[idx][0]
|
|
147
159
|
|
|
148
|
-
return
|
|
160
|
+
return result_queries_list
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import sys
|
|
3
|
-
from typing import
|
|
3
|
+
from typing import TypedDict
|
|
4
4
|
|
|
5
5
|
import numpy as np
|
|
6
6
|
import torch
|
|
@@ -12,6 +12,7 @@ from mteb._evaluators.evaluator import Evaluator
|
|
|
12
12
|
from mteb.abstasks.task_metadata import TaskMetadata
|
|
13
13
|
from mteb.models import EncoderProtocol
|
|
14
14
|
from mteb.similarity_functions import cos_sim, dot_score
|
|
15
|
+
from mteb.types import EncodeKwargs
|
|
15
16
|
|
|
16
17
|
# if later than python 3.13 use typing module
|
|
17
18
|
if sys.version_info >= (3, 13):
|
|
@@ -94,7 +95,7 @@ class SummarizationEvaluator(Evaluator):
|
|
|
94
95
|
self,
|
|
95
96
|
model: EncoderProtocol,
|
|
96
97
|
*,
|
|
97
|
-
encode_kwargs:
|
|
98
|
+
encode_kwargs: EncodeKwargs,
|
|
98
99
|
) -> SummarizationDistances:
|
|
99
100
|
# Get the human & machine summaries for the text in one go for all
|
|
100
101
|
human_lens = [len(human_summaries) for human_summaries in self.human_summaries]
|
|
@@ -109,7 +110,8 @@ class SummarizationEvaluator(Evaluator):
|
|
|
109
110
|
summary
|
|
110
111
|
for human_summaries in self.human_summaries
|
|
111
112
|
for summary in human_summaries
|
|
112
|
-
]
|
|
113
|
+
],
|
|
114
|
+
**encode_kwargs,
|
|
113
115
|
),
|
|
114
116
|
task_metadata=self.task_metadata,
|
|
115
117
|
hf_subset=self.hf_subset,
|
|
@@ -124,7 +126,8 @@ class SummarizationEvaluator(Evaluator):
|
|
|
124
126
|
summary
|
|
125
127
|
for machine_summaries in self.machine_summaries
|
|
126
128
|
for summary in machine_summaries
|
|
127
|
-
]
|
|
129
|
+
],
|
|
130
|
+
**encode_kwargs,
|
|
128
131
|
),
|
|
129
132
|
task_metadata=self.task_metadata,
|
|
130
133
|
hf_subset=self.hf_subset,
|
|
@@ -133,10 +136,10 @@ class SummarizationEvaluator(Evaluator):
|
|
|
133
136
|
)
|
|
134
137
|
|
|
135
138
|
# Split the embeddings into the original human & machine summaries
|
|
136
|
-
|
|
139
|
+
embs_human_summaries_all_split = np.split(
|
|
137
140
|
embs_human_summaries_all, np.cumsum(human_lens)[:-1]
|
|
138
141
|
)
|
|
139
|
-
|
|
142
|
+
embs_machine_summaries_all_split = np.split(
|
|
140
143
|
embs_machine_summaries_all, np.cumsum(machine_lens)[:-1]
|
|
141
144
|
)
|
|
142
145
|
|
|
@@ -146,7 +149,9 @@ class SummarizationEvaluator(Evaluator):
|
|
|
146
149
|
all_human_scores = []
|
|
147
150
|
|
|
148
151
|
for i, (embs_human_summaries, embs_machine_summaries) in tqdm(
|
|
149
|
-
enumerate(
|
|
152
|
+
enumerate(
|
|
153
|
+
zip(embs_human_summaries_all_split, embs_machine_summaries_all_split)
|
|
154
|
+
),
|
|
150
155
|
desc="Scoring",
|
|
151
156
|
total=len(self.human_summaries),
|
|
152
157
|
):
|
|
@@ -162,7 +167,7 @@ class SummarizationEvaluator(Evaluator):
|
|
|
162
167
|
dot_scores = dot_score(emb_machine_summary, embs_human_summaries)
|
|
163
168
|
|
|
164
169
|
_sim_score = [
|
|
165
|
-
float(model.similarity(emb_machine_summary, emb_human_summary))
|
|
170
|
+
float(model.similarity(emb_machine_summary, emb_human_summary))
|
|
166
171
|
for emb_human_summary in embs_human_summaries
|
|
167
172
|
]
|
|
168
173
|
sim_score = torch.tensor(_sim_score)
|
|
@@ -214,17 +219,19 @@ class SummarizationEvaluator(Evaluator):
|
|
|
214
219
|
strict=True,
|
|
215
220
|
):
|
|
216
221
|
cosine_spearman_scores.append(
|
|
217
|
-
spearmanr(human_scores, cosine_pred_scores).statistic
|
|
222
|
+
float(spearmanr(human_scores, cosine_pred_scores).statistic)
|
|
218
223
|
)
|
|
219
224
|
cosine_pearson_scores.append(
|
|
220
|
-
pearsonr(human_scores, cosine_pred_scores).statistic
|
|
225
|
+
float(pearsonr(human_scores, cosine_pred_scores).statistic)
|
|
221
226
|
)
|
|
222
227
|
dot_spearman_scores.append(
|
|
223
|
-
spearmanr(human_scores, dot_pred_scores).statistic
|
|
228
|
+
float(spearmanr(human_scores, dot_pred_scores).statistic)
|
|
224
229
|
)
|
|
225
|
-
dot_pearson_scores.append(
|
|
226
|
-
|
|
227
|
-
|
|
230
|
+
dot_pearson_scores.append(
|
|
231
|
+
float(pearsonr(human_scores, dot_pred_scores).statistic)
|
|
232
|
+
)
|
|
233
|
+
spearman_scores.append(float(spearmanr(human_scores, sim_scores).statistic))
|
|
234
|
+
pearson_scores.append(float(pearsonr(human_scores, sim_scores).statistic))
|
|
228
235
|
|
|
229
236
|
return SummarizationMetrics(
|
|
230
237
|
pearson=float(np.mean(pearson_scores)),
|
|
@@ -271,10 +278,10 @@ class DeprecatedSummarizationEvaluator(SummarizationEvaluator):
|
|
|
271
278
|
pearson_scores.append(pearsonr(human_scores, sim_scores))
|
|
272
279
|
|
|
273
280
|
return SummarizationMetrics(
|
|
274
|
-
pearson=float(np.mean(pearson_scores)),
|
|
275
|
-
spearman=float(np.mean(spearman_scores)),
|
|
276
|
-
cosine_spearman=float(np.mean(cosine_spearman_scores)),
|
|
277
|
-
cosine_pearson=float(np.mean(cosine_pearson_scores)),
|
|
278
|
-
dot_pearson=float(np.mean(dot_pearson_scores)),
|
|
279
|
-
dot_spearman=float(np.mean(dot_spearman_scores)),
|
|
281
|
+
pearson=float(np.mean(pearson_scores)), # type: ignore[arg-type]
|
|
282
|
+
spearman=float(np.mean(spearman_scores)), # type: ignore[arg-type]
|
|
283
|
+
cosine_spearman=float(np.mean(cosine_spearman_scores)), # type: ignore[arg-type]
|
|
284
|
+
cosine_pearson=float(np.mean(cosine_pearson_scores)), # type: ignore[arg-type]
|
|
285
|
+
dot_pearson=float(np.mean(dot_pearson_scores)), # type: ignore[arg-type]
|
|
286
|
+
dot_spearman=float(np.mean(dot_spearman_scores)), # type: ignore[arg-type]
|
|
280
287
|
)
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from typing import Any
|
|
3
2
|
|
|
4
3
|
from datasets import Dataset
|
|
5
4
|
|
|
@@ -10,7 +9,7 @@ from mteb._create_dataloaders import (
|
|
|
10
9
|
from mteb.abstasks.task_metadata import TaskMetadata
|
|
11
10
|
from mteb.models import EncoderProtocol
|
|
12
11
|
from mteb.similarity_functions import similarity
|
|
13
|
-
from mteb.types import Array
|
|
12
|
+
from mteb.types import Array, EncodeKwargs
|
|
14
13
|
|
|
15
14
|
from .evaluator import Evaluator
|
|
16
15
|
|
|
@@ -38,18 +37,21 @@ class ZeroShotClassificationEvaluator(Evaluator):
|
|
|
38
37
|
self.hf_subset = hf_subset
|
|
39
38
|
|
|
40
39
|
def __call__(
|
|
41
|
-
self,
|
|
40
|
+
self,
|
|
41
|
+
model: EncoderProtocol,
|
|
42
|
+
*,
|
|
43
|
+
encode_kwargs: EncodeKwargs,
|
|
42
44
|
) -> Array:
|
|
43
45
|
dataloader = create_dataloader(
|
|
44
46
|
self.dataset,
|
|
45
|
-
batch_size=encode_kwargs["batch_size"],
|
|
46
47
|
input_column=self.input_column_name,
|
|
47
48
|
task_metadata=self.task_metadata,
|
|
49
|
+
**encode_kwargs,
|
|
48
50
|
)
|
|
49
51
|
|
|
50
52
|
logger.info("Running zero-shot classification - Encoding labels...")
|
|
51
53
|
text_label_embeddings = model.encode(
|
|
52
|
-
_create_dataloader_from_texts(self.candidate_labels),
|
|
54
|
+
_create_dataloader_from_texts(self.candidate_labels, **encode_kwargs),
|
|
53
55
|
task_metadata=self.task_metadata,
|
|
54
56
|
hf_subset=self.hf_subset,
|
|
55
57
|
hf_split=self.hf_split,
|
|
File without changes
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
"""Simplified version of https://gist.github.com/AlexeyVatolin/ea3adc21aa7a767603ff393b22085adc from https://github.com/embeddings-benchmark/mteb/pull/2900"""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
|
|
5
|
+
import datasets
|
|
6
|
+
import pandas as pd
|
|
7
|
+
from datasets import Dataset, DatasetDict
|
|
8
|
+
|
|
9
|
+
from mteb import TaskMetadata
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def deduplicate(dataset: Dataset, input_column: str) -> Dataset:
|
|
15
|
+
"""Remove duplicate texts, keeping the first occurrence."""
|
|
16
|
+
unique_texts = set()
|
|
17
|
+
indices_to_keep = []
|
|
18
|
+
for i, text in enumerate(dataset[input_column]):
|
|
19
|
+
text = text.strip()
|
|
20
|
+
if text not in unique_texts:
|
|
21
|
+
unique_texts.add(text)
|
|
22
|
+
indices_to_keep.append(i)
|
|
23
|
+
|
|
24
|
+
logger.info(
|
|
25
|
+
f"[deduplicate] removed={len(dataset) - len(indices_to_keep)}/{len(dataset)}"
|
|
26
|
+
)
|
|
27
|
+
return dataset.select(indices_to_keep)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def filter_empty(dataset: Dataset, input_column: str) -> Dataset:
|
|
31
|
+
"""Filter out empty or whitespace-only examples."""
|
|
32
|
+
before = len(dataset)
|
|
33
|
+
ds = dataset.filter(lambda x: len(x[input_column].strip()) > 0)
|
|
34
|
+
logger.info(f"[filter_empty] removed={before - len(ds)}/{before}")
|
|
35
|
+
return ds
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def filter_train_leakage(
|
|
39
|
+
train_dataset: Dataset, test_dataset: Dataset, input_column: str
|
|
40
|
+
) -> Dataset:
|
|
41
|
+
"""Remove test examples that appear in training."""
|
|
42
|
+
train_texts = set(train_dataset[input_column])
|
|
43
|
+
before = len(test_dataset)
|
|
44
|
+
indices = [
|
|
45
|
+
i
|
|
46
|
+
for i, text in enumerate(test_dataset[input_column])
|
|
47
|
+
if text not in train_texts
|
|
48
|
+
]
|
|
49
|
+
logger.info(f"[filter_train_leakage] removed={before - len(indices)}/{before}")
|
|
50
|
+
return test_dataset.select(indices)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def filter_unclear_label(
|
|
54
|
+
dataset_dict: DatasetDict, input_column: str, label_column: str
|
|
55
|
+
) -> DatasetDict:
|
|
56
|
+
"""Remove examples where the same text appears with multiple different labels."""
|
|
57
|
+
normalized: dict[str, set[str | tuple[str, ...]]] = {}
|
|
58
|
+
logger.debug("[filter_controversial] scanning dataset for label conflicts...")
|
|
59
|
+
|
|
60
|
+
for split, ds in dataset_dict.items():
|
|
61
|
+
for text, label in zip(ds[input_column], ds[label_column]):
|
|
62
|
+
key = text.strip().lower()
|
|
63
|
+
normalized.setdefault(key, set()).add(
|
|
64
|
+
label if isinstance(label, (str, int, float)) else tuple(label) # type: ignore[arg-type]
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
bad_texts = {t for t, labels in normalized.items() if len(labels) > 1}
|
|
68
|
+
logger.info(f"[filter_controversial] Removing {len(bad_texts)} conflicting texts")
|
|
69
|
+
|
|
70
|
+
new_dict = {}
|
|
71
|
+
for split, ds in dataset_dict.items():
|
|
72
|
+
before = len(ds)
|
|
73
|
+
filtered = ds.filter(lambda x: x[input_column].strip().lower() not in bad_texts)
|
|
74
|
+
logger.debug(
|
|
75
|
+
f"[filter_controversial:{split}] removed={before - len(filtered)}/{before}"
|
|
76
|
+
)
|
|
77
|
+
new_dict[split] = filtered
|
|
78
|
+
|
|
79
|
+
return DatasetDict(new_dict)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def filter_short(dataset: Dataset, input_column: str, min_words: int = 3) -> Dataset:
|
|
83
|
+
"""Filter out texts with fewer than `min_words`."""
|
|
84
|
+
before = len(dataset)
|
|
85
|
+
ds = dataset.filter(lambda x: len(x[input_column].strip().split()) >= min_words)
|
|
86
|
+
logger.debug(f"[filter_short] removed={before - len(ds)}/{before}")
|
|
87
|
+
return ds
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def split_train_test(
|
|
91
|
+
ds: DatasetDict,
|
|
92
|
+
metadata: TaskMetadata,
|
|
93
|
+
train_split: str,
|
|
94
|
+
label_column: str,
|
|
95
|
+
) -> DatasetDict:
|
|
96
|
+
if train_split in ds and metadata.eval_splits == train_split:
|
|
97
|
+
before = len(ds[train_split])
|
|
98
|
+
logger.info(
|
|
99
|
+
f"[split_train_test] eval_splits == train_split; performing split on {before} examples"
|
|
100
|
+
)
|
|
101
|
+
ds[train_split] = ds[train_split].cast_column(
|
|
102
|
+
label_column,
|
|
103
|
+
datasets.ClassLabel(names=list(set(ds[train_split][label_column]))),
|
|
104
|
+
)
|
|
105
|
+
label_counts = pd.Series(ds[train_split][label_column]).value_counts()
|
|
106
|
+
one_sample_labels = set(label_counts[label_counts == 1].index.tolist())
|
|
107
|
+
|
|
108
|
+
if one_sample_labels:
|
|
109
|
+
logger.info(
|
|
110
|
+
f"[split_train_test] Removing {len(one_sample_labels)} labels with only one instance"
|
|
111
|
+
)
|
|
112
|
+
ds[train_split] = ds[train_split].filter(
|
|
113
|
+
lambda x: x[label_column] not in one_sample_labels
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
splits = ds[train_split].train_test_split(
|
|
117
|
+
test_size=min(2048, before // 2), seed=42, stratify_by_column=label_column
|
|
118
|
+
)
|
|
119
|
+
ds = DatasetDict({train_split: splits[train_split], "test": splits["test"]})
|
|
120
|
+
metadata.eval_splits = ["test"]
|
|
121
|
+
logger.info(
|
|
122
|
+
f"[split_train_test] Train size={len(ds[train_split])}, Test size={len(ds['test'])}"
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
return ds
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
from datasets import DatasetDict
|
|
4
|
+
|
|
5
|
+
from mteb import TaskMetadata
|
|
6
|
+
from mteb.abstasks import AbsTaskClassification
|
|
7
|
+
from mteb.abstasks._data_filter.filters import (
|
|
8
|
+
deduplicate,
|
|
9
|
+
filter_empty,
|
|
10
|
+
filter_short,
|
|
11
|
+
filter_train_leakage,
|
|
12
|
+
filter_unclear_label,
|
|
13
|
+
split_train_test,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def clean_dataset(
|
|
20
|
+
ds: DatasetDict,
|
|
21
|
+
metadata: TaskMetadata,
|
|
22
|
+
train_split: str,
|
|
23
|
+
input_column: str,
|
|
24
|
+
label_column: str,
|
|
25
|
+
subset: str | None = None,
|
|
26
|
+
) -> DatasetDict:
|
|
27
|
+
"""Apply the full cleaning pipeline with logging."""
|
|
28
|
+
logger.info("[clean_dataset] Starting dataset cleaning pipeline...")
|
|
29
|
+
|
|
30
|
+
transforms = [
|
|
31
|
+
("filter_empty", filter_empty),
|
|
32
|
+
("deduplicate", deduplicate),
|
|
33
|
+
]
|
|
34
|
+
|
|
35
|
+
skip_cjk_codes = {"zho", "jpn", "tha", "mya", "cmn"}
|
|
36
|
+
logger.info("[clean_dataset] Applying short-text filter")
|
|
37
|
+
cur_langs = (
|
|
38
|
+
metadata.eval_langs[subset]
|
|
39
|
+
if isinstance(metadata.eval_langs, dict) and subset
|
|
40
|
+
else metadata.eval_langs
|
|
41
|
+
)
|
|
42
|
+
apply_short = not any(lang.split("-")[0] in skip_cjk_codes for lang in cur_langs)
|
|
43
|
+
if apply_short:
|
|
44
|
+
logger.info("[clean_dataset] Applying short-text filter")
|
|
45
|
+
transforms.append(("filter_short", filter_short))
|
|
46
|
+
|
|
47
|
+
for split in [train_split, *metadata.eval_splits]:
|
|
48
|
+
if split not in ds:
|
|
49
|
+
logger.warning(f"[clean_dataset] Split '{split}' missing; skipping.")
|
|
50
|
+
continue
|
|
51
|
+
|
|
52
|
+
for name, fn in transforms:
|
|
53
|
+
before = len(ds[split])
|
|
54
|
+
ds[split] = fn(ds[split], input_column=input_column)
|
|
55
|
+
logger.info(
|
|
56
|
+
f"[clean_dataset:{split}] {name} removed={before - len(ds[split])}"
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
ds = split_train_test(ds, metadata, train_split, label_column)
|
|
60
|
+
|
|
61
|
+
for split in metadata.eval_splits:
|
|
62
|
+
if split == train_split:
|
|
63
|
+
continue
|
|
64
|
+
before = len(ds[split])
|
|
65
|
+
ds[split] = filter_train_leakage(ds[train_split], ds[split], input_column)
|
|
66
|
+
logger.info(
|
|
67
|
+
f"[clean_dataset:{split}] leakage_removed={before - len(ds[split])}"
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
ds = filter_unclear_label(ds, input_column=input_column, label_column=label_column)
|
|
71
|
+
|
|
72
|
+
logger.info("[clean_dataset] Cleaning pipeline complete.")
|
|
73
|
+
return ds
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def process_classification(
|
|
77
|
+
task: AbsTaskClassification,
|
|
78
|
+
) -> DatasetDict | dict[str, DatasetDict]:
|
|
79
|
+
"""Process classification task dataset(s) with cleaning pipeline."""
|
|
80
|
+
if not task.data_loaded:
|
|
81
|
+
task.load_data()
|
|
82
|
+
if isinstance(task.dataset, DatasetDict):
|
|
83
|
+
return clean_dataset(
|
|
84
|
+
task.dataset,
|
|
85
|
+
task.metadata,
|
|
86
|
+
task.train_split,
|
|
87
|
+
task.input_column_name,
|
|
88
|
+
task.label_column_name,
|
|
89
|
+
subset=None,
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
if task.dataset is None:
|
|
93
|
+
raise ValueError("Task dataset is None.")
|
|
94
|
+
|
|
95
|
+
new_ds = {}
|
|
96
|
+
for subset in task.dataset:
|
|
97
|
+
new_ds[subset] = clean_dataset(
|
|
98
|
+
task.dataset[subset],
|
|
99
|
+
task.metadata,
|
|
100
|
+
task.train_split,
|
|
101
|
+
task.input_column_name,
|
|
102
|
+
task.label_column_name,
|
|
103
|
+
subset=subset,
|
|
104
|
+
)
|
|
105
|
+
return new_ds
|