mteb 2.0.5__py3-none-any.whl → 2.1.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/__init__.py +10 -1
- mteb/_create_dataloaders.py +8 -3
- mteb/_evaluators/any_sts_evaluator.py +14 -12
- mteb/_evaluators/clustering_evaluator.py +1 -1
- mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +2 -2
- mteb/_evaluators/pair_classification_evaluator.py +3 -1
- mteb/_evaluators/retrieval_metrics.py +0 -9
- mteb/_evaluators/sklearn_evaluator.py +15 -28
- mteb/_evaluators/text/bitext_mining_evaluator.py +4 -1
- mteb/_evaluators/text/summarization_evaluator.py +4 -2
- mteb/_evaluators/zeroshot_classification_evaluator.py +2 -2
- mteb/abstasks/_stratification.py +1 -1
- mteb/abstasks/abstask.py +6 -1
- mteb/abstasks/clustering.py +1 -1
- mteb/abstasks/dataset_card_template.md +1 -1
- mteb/abstasks/multilabel_classification.py +2 -2
- mteb/abstasks/retrieval.py +2 -1
- mteb/abstasks/retrieval_dataset_loaders.py +1 -1
- mteb/abstasks/task_metadata.py +2 -1
- mteb/benchmarks/_create_table.py +1 -3
- mteb/benchmarks/benchmark.py +18 -1
- mteb/benchmarks/benchmarks/__init__.py +4 -0
- mteb/benchmarks/benchmarks/benchmarks.py +125 -16
- mteb/benchmarks/get_benchmark.py +3 -1
- mteb/cache.py +7 -3
- mteb/descriptive_stats/Classification/DutchColaClassification.json +54 -0
- mteb/descriptive_stats/Classification/DutchGovernmentBiasClassification.json +54 -0
- mteb/descriptive_stats/Classification/DutchNewsArticlesClassification.json +90 -0
- mteb/descriptive_stats/Classification/DutchSarcasticHeadlinesClassification.json +54 -0
- mteb/descriptive_stats/Classification/IconclassClassification.json +96 -0
- mteb/descriptive_stats/Classification/OpenTenderClassification.json +222 -0
- mteb/descriptive_stats/Classification/VaccinChatNLClassification.json +1068 -0
- mteb/descriptive_stats/Clustering/DutchNewsArticlesClusteringP2P.json +45 -0
- mteb/descriptive_stats/Clustering/DutchNewsArticlesClusteringS2S.json +45 -0
- mteb/descriptive_stats/Clustering/IconclassClusteringS2S.json +48 -0
- mteb/descriptive_stats/Clustering/OpenTenderClusteringP2P.json +111 -0
- mteb/descriptive_stats/Clustering/OpenTenderClusteringS2S.json +111 -0
- mteb/descriptive_stats/Clustering/VABBClusteringP2P.json +60 -0
- mteb/descriptive_stats/Clustering/VABBClusteringS2S.json +60 -0
- mteb/descriptive_stats/Image/Any2AnyMultilingualRetrieval/XFlickr30kCoT2IRetrieval.json +243 -153
- mteb/descriptive_stats/Image/Any2AnyMultilingualRetrieval/XM3600T2IRetrieval.json +999 -629
- mteb/descriptive_stats/Image/Any2AnyRetrieval/OVENIT2TRetrieval.json +33 -17
- mteb/descriptive_stats/Image/DocumentUnderstanding/MIRACLVisionRetrieval.json +574 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3ComputerScienceRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3EnergyRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceEnRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceFrRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3HrRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3IndustrialRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3NuclearRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PharmaceuticalsRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PhysicsRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3TelecomRetrieval.json +214 -0
- mteb/descriptive_stats/MultilabelClassification/CovidDisinformationNLMultiLabelClassification.json +84 -0
- mteb/descriptive_stats/MultilabelClassification/VABBMultiLabelClassification.json +156 -0
- mteb/descriptive_stats/PairClassification/SICKNLPairClassification.json +35 -0
- mteb/descriptive_stats/PairClassification/XLWICNLPairClassification.json +35 -0
- mteb/descriptive_stats/Retrieval/ArguAna-NL.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/ClimateFEVERHardNegatives.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/DBPediaHardNegatives.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/DutchNewsArticlesRetrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/FEVERHardNegatives.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/HotpotQAHardNegatives.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/LegalQANLRetrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/NFCorpus-NL.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/OpenTenderRetrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/QuoraRetrievalHardNegatives.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/RiaNewsRetrievalHardNegatives.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/SCIDOCS-NL.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/SciFact-NL.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/VABBRetrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/VDRMultilingualRetrieval.json +184 -0
- mteb/descriptive_stats/Retrieval/WinoGrande.json +14 -14
- mteb/descriptive_stats/Retrieval/bBSARDNLRetrieval.json +30 -0
- mteb/descriptive_stats/STS/SICK-NL-STS.json +28 -0
- mteb/evaluate.py +26 -6
- mteb/languages/check_language_code.py +11 -3
- mteb/languages/language_scripts.py +4 -0
- mteb/leaderboard/app.py +5 -3
- mteb/leaderboard/benchmark_selector.py +4 -2
- mteb/leaderboard/text_segments.py +1 -1
- mteb/models/cache_wrappers/cache_wrapper.py +1 -1
- mteb/models/instruct_wrapper.py +3 -0
- mteb/models/model_implementations/align_models.py +6 -0
- mteb/models/model_implementations/andersborges.py +51 -0
- mteb/models/model_implementations/ara_models.py +7 -0
- mteb/models/model_implementations/b1ade_models.py +1 -1
- mteb/models/model_implementations/bge_models.py +1 -3
- mteb/models/model_implementations/blip2_models.py +9 -0
- mteb/models/model_implementations/blip_models.py +19 -0
- mteb/models/model_implementations/bmretriever_models.py +1 -1
- mteb/models/model_implementations/cadet_models.py +8 -0
- mteb/models/model_implementations/cde_models.py +12 -0
- mteb/models/model_implementations/codefuse_models.py +15 -0
- mteb/models/model_implementations/codesage_models.py +12 -0
- mteb/models/model_implementations/cohere_models.py +1 -1
- mteb/models/model_implementations/colqwen_models.py +57 -0
- mteb/models/model_implementations/emillykkejensen_models.py +70 -0
- mteb/models/model_implementations/gme_v_models.py +2 -2
- mteb/models/model_implementations/ibm_granite_models.py +1 -1
- mteb/models/model_implementations/inf_models.py +3 -3
- mteb/models/model_implementations/jasper_models.py +253 -2
- mteb/models/model_implementations/jina_models.py +12 -2
- mteb/models/model_implementations/kalm_models.py +159 -25
- mteb/models/model_implementations/llm2vec_models.py +1 -1
- mteb/models/model_implementations/misc_models.py +8 -2
- mteb/models/model_implementations/moco_models.py +9 -0
- mteb/models/model_implementations/mxbai_models.py +1 -1
- mteb/models/model_implementations/openclip_models.py +16 -0
- mteb/models/model_implementations/piccolo_models.py +6 -0
- mteb/models/model_implementations/rasgaard_models.py +33 -0
- mteb/models/model_implementations/reasonir_model.py +1 -1
- mteb/models/model_implementations/salesforce_models.py +1 -1
- mteb/models/model_implementations/seed_1_6_embedding_models.py +1 -1
- mteb/models/model_implementations/spartan8806_atles_champion.py +26 -0
- mteb/models/model_implementations/tarka_models.py +374 -0
- mteb/models/model_implementations/voyage_models.py +6 -7
- mteb/models/model_implementations/voyage_v.py +10 -9
- mteb/models/model_implementations/yuan_models.py +33 -0
- mteb/models/search_wrappers.py +6 -5
- mteb/results/task_result.py +19 -17
- mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +4 -2
- mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining_fast.py +1 -1
- mteb/tasks/bitext_mining/multilingual/ru_sci_bench_bitext_mining.py +1 -5
- mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -6
- mteb/tasks/classification/ara/ajgt.py +1 -2
- mteb/tasks/classification/ara/hotel_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/ara/tweet_emotion_classification.py +1 -2
- mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -2
- mteb/tasks/classification/ben/bengali_document_classification.py +1 -2
- mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -2
- mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -2
- mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/ces/czech_product_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/ces/czech_so_me_sentiment_classification.py +1 -2
- mteb/tasks/classification/dan/angry_tweets_classification.py +2 -3
- mteb/tasks/classification/dan/danish_political_comments_classification.py +1 -2
- mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -2
- mteb/tasks/classification/dan/dk_hate_classification.py +1 -2
- mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -2
- mteb/tasks/classification/deu/ten_k_gnad_classification.py +1 -2
- mteb/tasks/classification/eng/amazon_polarity_classification.py +1 -2
- mteb/tasks/classification/eng/arxiv_classification.py +1 -2
- mteb/tasks/classification/eng/banking77_classification.py +1 -2
- mteb/tasks/classification/eng/dbpedia_classification.py +1 -2
- mteb/tasks/classification/eng/emotion_classification.py +1 -2
- mteb/tasks/classification/eng/financial_phrasebank_classification.py +1 -2
- mteb/tasks/classification/eng/frenk_en_classification.py +1 -2
- mteb/tasks/classification/eng/gtsrb_classification.py +1 -1
- mteb/tasks/classification/eng/imdb_classification.py +1 -2
- mteb/tasks/classification/eng/legal_bench_classification.py +15 -121
- mteb/tasks/classification/eng/news_classification.py +1 -2
- mteb/tasks/classification/eng/patch_camelyon_classification.py +1 -1
- mteb/tasks/classification/eng/patent_classification.py +1 -2
- mteb/tasks/classification/eng/poem_sentiment_classification.py +1 -2
- mteb/tasks/classification/eng/sds_eye_protection_classification.py +1 -2
- mteb/tasks/classification/eng/sds_gloves_classification.py +1 -2
- mteb/tasks/classification/eng/toxic_chat_classification.py +2 -19
- mteb/tasks/classification/eng/toxic_conversations_classification.py +1 -2
- mteb/tasks/classification/eng/tweet_sentiment_extraction_classification.py +1 -2
- mteb/tasks/classification/eng/tweet_topic_single_classification.py +2 -13
- mteb/tasks/classification/eng/ucf101_classification.py +1 -5
- mteb/tasks/classification/eng/wikipedia_bio_met_chem_classification.py +1 -2
- mteb/tasks/classification/eng/wikipedia_chem_fields_classification.py +1 -2
- mteb/tasks/classification/eng/wikipedia_comp_chem_spectroscopy_classification.py +1 -2
- mteb/tasks/classification/eng/wikipedia_crystallography_analytical_classification.py +1 -2
- mteb/tasks/classification/eng/wikipedia_theoretical_applied_classification.py +1 -2
- mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -2
- mteb/tasks/classification/eng/yelp_review_full_classification.py +1 -2
- mteb/tasks/classification/est/estonian_valence.py +1 -2
- mteb/tasks/classification/fas/fa_mteb_classification.py +7 -14
- mteb/tasks/classification/fil/filipino_hate_speech_classification.py +1 -2
- mteb/tasks/classification/fin/fin_toxicity_classification.py +2 -11
- mteb/tasks/classification/fra/french_book_reviews.py +1 -2
- mteb/tasks/classification/fra/movie_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/guj/gujarati_news_classification.py +1 -2
- mteb/tasks/classification/heb/hebrew_sentiment_analysis.py +1 -2
- mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -2
- mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -2
- mteb/tasks/classification/hrv/frenk_hr_classification.py +1 -2
- mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +1 -2
- mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -2
- mteb/tasks/classification/ita/italian_linguist_acceptability_classification.py +1 -2
- mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -2
- mteb/tasks/classification/jpn/wrime_classification.py +1 -2
- mteb/tasks/classification/kan/kannada_news_classification.py +1 -2
- mteb/tasks/classification/kor/klue_tc.py +1 -2
- mteb/tasks/classification/kor/kor_hate_classification.py +2 -17
- mteb/tasks/classification/kor/kor_sarcasm_classification.py +2 -19
- mteb/tasks/classification/kur/kurdish_sentiment_classification.py +1 -2
- mteb/tasks/classification/mal/malayalam_news_classification.py +1 -2
- mteb/tasks/classification/mar/marathi_news_classification.py +1 -2
- mteb/tasks/classification/mkd/macedonian_tweet_sentiment_classification.py +1 -2
- mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -6
- mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -4
- mteb/tasks/classification/multilingual/ru_sci_bench_classification.py +4 -23
- mteb/tasks/classification/multilingual/scala_classification.py +1 -2
- mteb/tasks/classification/multilingual/sib200_classification.py +1 -6
- mteb/tasks/classification/mya/myanmar_news.py +2 -3
- mteb/tasks/classification/nep/nepali_news_classification.py +1 -2
- mteb/tasks/classification/nld/__init__.py +16 -0
- mteb/tasks/classification/nld/dutch_book_review_sentiment_classification.py +4 -2
- mteb/tasks/classification/nld/dutch_cola_classification.py +41 -0
- mteb/tasks/classification/nld/dutch_government_bias_classification.py +40 -0
- mteb/tasks/classification/nld/dutch_news_articles_classification.py +33 -0
- mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +39 -0
- mteb/tasks/classification/nld/iconclass_classification.py +44 -0
- mteb/tasks/classification/nld/open_tender_classification.py +41 -0
- mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +49 -0
- mteb/tasks/classification/nob/no_rec_classification.py +1 -2
- mteb/tasks/classification/nob/norwegian_parliament_classification.py +1 -2
- mteb/tasks/classification/ory/odia_news_classification.py +1 -2
- mteb/tasks/classification/pol/polish_classification.py +3 -6
- mteb/tasks/classification/ron/moroco.py +1 -2
- mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -2
- mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -2
- mteb/tasks/classification/rus/georeview_classification.py +1 -2
- mteb/tasks/classification/rus/headline_classification.py +1 -2
- mteb/tasks/classification/rus/inappropriateness_classification.py +1 -2
- mteb/tasks/classification/rus/ru_reviews_classification.py +1 -2
- mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -2
- mteb/tasks/classification/rus/senti_ru_eval.py +1 -2
- mteb/tasks/classification/sin/sinhala_news_classification.py +1 -2
- mteb/tasks/classification/sin/sinhala_news_source_classification.py +1 -2
- mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/slk/slovak_hate_speech_classification.py +1 -2
- mteb/tasks/classification/slk/slovak_movie_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/slv/frenk_sl_classification.py +1 -2
- mteb/tasks/classification/spa/spanish_news_classification.py +1 -2
- mteb/tasks/classification/spa/spanish_sentiment_classification.py +1 -2
- mteb/tasks/classification/ssw/siswati_news_classification.py +1 -2
- mteb/tasks/classification/swa/swahili_news_classification.py +1 -2
- mteb/tasks/classification/swe/dalaj_classification.py +1 -2
- mteb/tasks/classification/swe/swe_rec_classification.py +1 -2
- mteb/tasks/classification/swe/swedish_sentiment_classification.py +1 -2
- mteb/tasks/classification/tam/tamil_news_classification.py +1 -2
- mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +1 -2
- mteb/tasks/classification/tha/wisesight_sentiment_classification.py +1 -2
- mteb/tasks/classification/tha/wongnai_reviews_classification.py +1 -1
- mteb/tasks/classification/tsn/tswana_news_classification.py +1 -2
- mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +1 -2
- mteb/tasks/classification/tur/turkish_product_sentiment_classification.py +1 -2
- mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -15
- mteb/tasks/classification/urd/urdu_roman_sentiment_classification.py +1 -2
- mteb/tasks/classification/vie/amazon_counterfactual_vn_classification.py +1 -6
- mteb/tasks/classification/vie/amazon_polarity_vn_classification.py +1 -6
- mteb/tasks/classification/vie/amazon_reviews_vn_classification.py +1 -5
- mteb/tasks/classification/vie/banking77_vn_classification.py +1 -5
- mteb/tasks/classification/vie/emotion_vn_classification.py +1 -5
- mteb/tasks/classification/vie/imdb_vn_classification.py +1 -5
- mteb/tasks/classification/vie/massive_intent_vn_classification.py +1 -5
- mteb/tasks/classification/vie/massive_scenario_vn_classification.py +1 -5
- mteb/tasks/classification/vie/mtop_domain_vn_classification.py +1 -5
- mteb/tasks/classification/vie/mtop_intent_vn_classification.py +1 -5
- mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -5
- mteb/tasks/classification/vie/tweet_sentiment_extraction_vn_classification.py +1 -5
- mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -2
- mteb/tasks/classification/zho/cmteb_classification.py +5 -10
- mteb/tasks/classification/zho/yue_openrice_review_classification.py +1 -2
- mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -2
- mteb/tasks/clustering/__init__.py +1 -0
- mteb/tasks/clustering/jpn/mews_c16_ja_clustering.py +1 -3
- mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -6
- mteb/tasks/clustering/nld/__init__.py +17 -0
- mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +40 -0
- mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +40 -0
- mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +50 -0
- mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +54 -0
- mteb/tasks/clustering/nld/open_tender_clustering_s2s.py +44 -0
- mteb/tasks/clustering/nld/vabb_clustering_p2p.py +54 -0
- mteb/tasks/clustering/nld/vabb_clustering_s2s.py +54 -0
- mteb/tasks/clustering/vie/reddit_clustering_p2p_vn.py +1 -5
- mteb/tasks/clustering/vie/reddit_clustering_vn.py +1 -5
- mteb/tasks/clustering/vie/stack_exchange_clustering_p2p_vn.py +1 -5
- mteb/tasks/clustering/vie/stack_exchange_clustering_vn.py +1 -5
- mteb/tasks/clustering/vie/twenty_newsgroups_clustering_vn.py +1 -5
- mteb/tasks/multilabel_classification/__init__.py +1 -0
- mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -5
- mteb/tasks/multilabel_classification/kor/kor_hate_speech_ml_classification.py +1 -9
- mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -6
- mteb/tasks/multilabel_classification/nld/__init__.py +9 -0
- mteb/tasks/multilabel_classification/nld/covid_disinformation_nl_multi_label_classification.py +91 -0
- mteb/tasks/multilabel_classification/nld/vabb_multi_label_classification.py +47 -0
- mteb/tasks/multilabel_classification/por/brazilian_toxic_tweets_classification.py +1 -6
- mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
- mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -2
- mteb/tasks/pair_classification/__init__.py +1 -0
- mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -6
- mteb/tasks/pair_classification/eng/legal_bench_pc.py +1 -9
- mteb/tasks/pair_classification/multilingual/indic_xnli_pair_classification.py +9 -8
- mteb/tasks/pair_classification/nld/__init__.py +7 -0
- mteb/tasks/pair_classification/nld/sick_nl_pair_classification.py +39 -0
- mteb/tasks/pair_classification/nld/xlwic_nl_pair_classification.py +44 -0
- mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -5
- mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -5
- mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -5
- mteb/tasks/regression/multilingual/ru_sci_bench_regression.py +2 -6
- mteb/tasks/reranking/multilingual/x_glue_wpr_reranking.py +1 -2
- mteb/tasks/reranking/vie/ask_ubuntu_dup_questions_vn.py +1 -5
- mteb/tasks/reranking/vie/sci_docs_reranking_vn.py +1 -5
- mteb/tasks/reranking/vie/stack_overflow_dup_questions_vn.py +1 -5
- mteb/tasks/retrieval/code/code_rag.py +8 -8
- mteb/tasks/retrieval/dan/dan_fever_retrieval.py +1 -1
- mteb/tasks/retrieval/dan/tv2_nordretrieval.py +2 -2
- mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +2 -2
- mteb/tasks/retrieval/eng/__init__.py +18 -4
- mteb/tasks/retrieval/eng/climate_fever_retrieval.py +68 -77
- mteb/tasks/retrieval/eng/dbpedia_retrieval.py +55 -50
- mteb/tasks/retrieval/eng/fever_retrieval.py +62 -67
- mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +0 -4
- mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +0 -4
- mteb/tasks/retrieval/eng/hotpot_qa_retrieval.py +57 -67
- mteb/tasks/retrieval/eng/legal_summarization_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -8
- mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +0 -3
- mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +0 -2
- mteb/tasks/retrieval/eng/oven_it2t_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/quora_retrieval.py +51 -46
- mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +0 -4
- mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +0 -4
- mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +0 -2
- mteb/tasks/retrieval/eng/wino_grande_retrieval.py +1 -1
- mteb/tasks/retrieval/jpn/ja_cwir_retrieval.py +1 -4
- mteb/tasks/retrieval/jpn/ja_gov_faqs_retrieval.py +1 -1
- mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +11 -4
- mteb/tasks/retrieval/multilingual/__init__.py +22 -0
- mteb/tasks/retrieval/multilingual/belebele_retrieval.py +6 -5
- mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +0 -2
- mteb/tasks/retrieval/multilingual/miracl_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +2 -9
- mteb/tasks/retrieval/multilingual/mkqa_retrieval.py +1 -2
- mteb/tasks/retrieval/multilingual/mlqa_retrieval.py +1 -4
- mteb/tasks/retrieval/multilingual/multi_long_doc_retrieval.py +1 -2
- mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +9 -4
- mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +2 -12
- mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +0 -2
- mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +399 -0
- mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +0 -2
- mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +6 -5
- mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +3 -4
- mteb/tasks/retrieval/nld/__init__.py +18 -4
- mteb/tasks/retrieval/nld/argu_ana_nl_retrieval.py +46 -27
- mteb/tasks/retrieval/nld/bbsard_nl_retrieval.py +44 -0
- mteb/tasks/retrieval/nld/dutch_news_articles_retrieval.py +33 -0
- mteb/tasks/retrieval/nld/legal_qa_nl_retrieval.py +42 -0
- mteb/tasks/retrieval/nld/nf_corpus_nl_retrieval.py +42 -25
- mteb/tasks/retrieval/nld/open_tender_retrieval.py +41 -0
- mteb/tasks/retrieval/nld/sci_fact_nl_retrieval.py +42 -24
- mteb/tasks/retrieval/nld/scidocsnl_retrieval.py +44 -27
- mteb/tasks/retrieval/nld/vabb_retrieval.py +44 -0
- mteb/tasks/retrieval/nob/norquad.py +2 -2
- mteb/tasks/retrieval/nob/snl_retrieval.py +2 -2
- mteb/tasks/retrieval/rus/__init__.py +11 -2
- mteb/tasks/retrieval/rus/ria_news_retrieval.py +48 -44
- mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -7
- mteb/tasks/retrieval/tur/tur_hist_quad.py +2 -2
- mteb/tasks/retrieval/vie/argu_ana_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_android_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_gis_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_mathematica_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_physics_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_programmers_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_stats_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_tex_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_unix_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_webmasters_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_wordpress_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/fevervn_retrieval.py +1 -7
- mteb/tasks/retrieval/vie/fi_qa2018_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/green_node_table_markdown_retrieval.py +16 -1
- mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +1 -6
- mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/nf_corpus_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/nqvn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/quora_vn_retrieval.py +1 -6
- mteb/tasks/retrieval/vie/sci_fact_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/scidocsvn_retrieval.py +1 -6
- mteb/tasks/retrieval/vie/touche2020_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/treccovidvn_retrieval.py +1 -5
- mteb/tasks/sts/__init__.py +1 -0
- mteb/tasks/sts/nld/__init__.py +5 -0
- mteb/tasks/sts/nld/sick_nl_sts.py +42 -0
- mteb/tasks/sts/vie/biosses_stsvn.py +1 -5
- mteb/tasks/sts/vie/sickr_stsvn.py +1 -5
- mteb/tasks/sts/vie/sts_benchmark_stsvn.py +1 -5
- mteb/tasks/zeroshot_classification/eng/gtsrb.py +1 -1
- mteb/tasks/zeroshot_classification/eng/patch_camelyon.py +1 -1
- mteb/tasks/zeroshot_classification/eng/ucf101.py +1 -5
- mteb-2.1.19.dist-info/METADATA +253 -0
- {mteb-2.0.5.dist-info → mteb-2.1.19.dist-info}/RECORD +398 -330
- mteb/descriptive_stats/Classification/PersianTextTone.json +0 -56
- mteb/descriptive_stats/Image/Any2TextMutipleChoice/CVBenchCount.json +0 -37
- mteb/descriptive_stats/Image/Any2TextMutipleChoice/CVBenchDepth.json +0 -25
- mteb/descriptive_stats/Image/Any2TextMutipleChoice/CVBenchDistance.json +0 -25
- mteb/descriptive_stats/Image/Any2TextMutipleChoice/CVBenchRelation.json +0 -25
- mteb/descriptive_stats/Image/VisualSTS/STS12VisualSTS.json +0 -20
- mteb/descriptive_stats/Image/VisualSTS/STS13VisualSTS.json +0 -20
- mteb/descriptive_stats/Image/VisualSTS/STS14VisualSTS.json +0 -20
- mteb/descriptive_stats/Image/VisualSTS/STS15VisualSTS.json +0 -20
- mteb/descriptive_stats/Image/VisualSTS/STS16VisualSTS.json +0 -20
- mteb/descriptive_stats/Image/VisualSTS/STS17MultilingualVisualSTS.json +0 -220
- mteb/descriptive_stats/Image/VisualSTS/STSBenchmarkMultilingualVisualSTS.json +0 -402
- mteb/descriptive_stats/Reranking/InstructIR.json +0 -31
- mteb-2.0.5.dist-info/METADATA +0 -455
- {mteb-2.0.5.dist-info → mteb-2.1.19.dist-info}/WHEEL +0 -0
- {mteb-2.0.5.dist-info → mteb-2.1.19.dist-info}/entry_points.txt +0 -0
- {mteb-2.0.5.dist-info → mteb-2.1.19.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.0.5.dist-info → mteb-2.1.19.dist-info}/top_level.txt +0 -0
|
@@ -20,9 +20,7 @@ def _load_data(path: str, splits: str, revision: str | None = None):
|
|
|
20
20
|
corpus[split] = split_dataset.map(
|
|
21
21
|
lambda x, idx: {
|
|
22
22
|
"id": f"corpus-{split}-{idx}",
|
|
23
|
-
# "text": None,
|
|
24
23
|
"modality": "text",
|
|
25
|
-
"image": None,
|
|
26
24
|
},
|
|
27
25
|
with_indices=True,
|
|
28
26
|
remove_columns=[
|
|
@@ -37,9 +35,7 @@ def _load_data(path: str, splits: str, revision: str | None = None):
|
|
|
37
35
|
queries[split] = split_dataset.map(
|
|
38
36
|
lambda x, idx: {
|
|
39
37
|
"id": f"query-{split}-{idx}",
|
|
40
|
-
"text": None,
|
|
41
38
|
"modality": "image",
|
|
42
|
-
# "image": None,
|
|
43
39
|
},
|
|
44
40
|
with_indices=True,
|
|
45
41
|
remove_columns=[
|
|
@@ -20,9 +20,7 @@ def _load_data(path: str, splits: str, revision: str | None = None):
|
|
|
20
20
|
corpus[split] = split_dataset.map(
|
|
21
21
|
lambda x, idx: {
|
|
22
22
|
"id": f"corpus-{split}-{idx}",
|
|
23
|
-
"text": None,
|
|
24
23
|
"modality": "image",
|
|
25
|
-
# "image": None,
|
|
26
24
|
},
|
|
27
25
|
with_indices=True,
|
|
28
26
|
remove_columns=[
|
|
@@ -37,9 +35,7 @@ def _load_data(path: str, splits: str, revision: str | None = None):
|
|
|
37
35
|
queries[split] = split_dataset.map(
|
|
38
36
|
lambda x, idx: {
|
|
39
37
|
"id": f"query-{split}-{idx}",
|
|
40
|
-
# "text": None,
|
|
41
38
|
"modality": "text",
|
|
42
|
-
"image": None,
|
|
43
39
|
},
|
|
44
40
|
with_indices=True,
|
|
45
41
|
remove_columns=[
|
|
@@ -24,7 +24,6 @@ def _load_data(
|
|
|
24
24
|
lambda x: {
|
|
25
25
|
"id": f"query-{split}-{x['query-id']}",
|
|
26
26
|
"text": x["query"],
|
|
27
|
-
"image": None,
|
|
28
27
|
"modality": "text",
|
|
29
28
|
},
|
|
30
29
|
remove_columns=["query-id", "query"],
|
|
@@ -40,7 +39,6 @@ def _load_data(
|
|
|
40
39
|
corpus_ds = corpus_ds.map(
|
|
41
40
|
lambda x: {
|
|
42
41
|
"id": f"corpus-{split}-{x['corpus-id']}",
|
|
43
|
-
"text": None,
|
|
44
42
|
"modality": "image",
|
|
45
43
|
},
|
|
46
44
|
remove_columns=["corpus-id"],
|
|
@@ -9,7 +9,7 @@ class WinoGrande(AbsTaskRetrieval):
|
|
|
9
9
|
reference="https://winogrande.allenai.org/",
|
|
10
10
|
dataset={
|
|
11
11
|
"path": "mteb/WinoGrande",
|
|
12
|
-
"revision": "
|
|
12
|
+
"revision": "4dec9c5666e9f84702ac614363db6d96a68bc6de",
|
|
13
13
|
},
|
|
14
14
|
type="Retrieval",
|
|
15
15
|
category="t2t",
|
|
@@ -9,10 +9,7 @@ class JaCWIRRetrieval(AbsTaskRetrieval):
|
|
|
9
9
|
|
|
10
10
|
metadata = TaskMetadata(
|
|
11
11
|
name="JaCWIRRetrieval",
|
|
12
|
-
description="
|
|
13
|
-
5000 question texts and approximately 500k web page titles and web page introductions or summaries
|
|
14
|
-
(meta descriptions, etc.). The question texts are created based on one of the 500k web pages,
|
|
15
|
-
and that data is used as a positive example for the question text.""",
|
|
12
|
+
description="JaCWIR is a small-scale Japanese information retrieval evaluation dataset consisting of 5000 question texts and approximately 500k web page titles and web page introductions or summaries (meta descriptions, etc.). The question texts are created based on one of the 500k web pages, and that data is used as a positive example for the question text.",
|
|
16
13
|
reference="https://huggingface.co/datasets/hotchpotch/JaCWIR",
|
|
17
14
|
dataset={
|
|
18
15
|
"path": "mteb/JaCWIRRetrieval",
|
|
@@ -7,7 +7,7 @@ class JaGovFaqsRetrieval(AbsTaskRetrieval):
|
|
|
7
7
|
|
|
8
8
|
metadata = TaskMetadata(
|
|
9
9
|
name="JaGovFaqsRetrieval",
|
|
10
|
-
description="JaGovFaqs is a dataset consisting of FAQs
|
|
10
|
+
description="JaGovFaqs is a dataset consisting of FAQs manually extracted from the website of Japanese bureaus. The dataset consists of 22k FAQs, where the queries (questions) and corpus (answers) have been shuffled, and the goal is to match the answer with the question.",
|
|
11
11
|
reference="https://github.com/sbintuitions/JMTEB",
|
|
12
12
|
dataset={
|
|
13
13
|
"path": "mteb/JaGovFaqsRetrieval",
|
|
@@ -46,10 +46,17 @@ class GeorgianFAQRetrieval(AbsTaskRetrieval):
|
|
|
46
46
|
split=_EVAL_SPLIT,
|
|
47
47
|
revision=self.metadata.dataset["revision"],
|
|
48
48
|
)
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
}
|
|
52
|
-
|
|
49
|
+
|
|
50
|
+
question_ids = {}
|
|
51
|
+
answer_ids = {}
|
|
52
|
+
|
|
53
|
+
for row in data:
|
|
54
|
+
question = row["question"]
|
|
55
|
+
answer = row["answer"]
|
|
56
|
+
if question not in question_ids:
|
|
57
|
+
question_ids[question] = len(question_ids)
|
|
58
|
+
if answer not in answer_ids:
|
|
59
|
+
answer_ids[answer] = len(answer_ids)
|
|
53
60
|
|
|
54
61
|
for row in data:
|
|
55
62
|
question = row["question"]
|
|
@@ -81,6 +81,18 @@ from .vidore2_bench_retrieval import (
|
|
|
81
81
|
Vidore2ESGReportsHLRetrieval,
|
|
82
82
|
Vidore2ESGReportsRetrieval,
|
|
83
83
|
)
|
|
84
|
+
from .vidore3_bench_retrieval import (
|
|
85
|
+
Vidore3ComputerScienceRetrieval,
|
|
86
|
+
Vidore3EnergyRetrieval,
|
|
87
|
+
Vidore3FinanceEnRetrieval,
|
|
88
|
+
Vidore3FinanceFrRetrieval,
|
|
89
|
+
Vidore3HrRetrieval,
|
|
90
|
+
Vidore3IndustrialRetrieval,
|
|
91
|
+
Vidore3NuclearRetrieval,
|
|
92
|
+
Vidore3PharmaceuticalsRetrieval,
|
|
93
|
+
Vidore3PhysicsRetrieval,
|
|
94
|
+
Vidore3TelecomRetrieval,
|
|
95
|
+
)
|
|
84
96
|
from .web_faq_retrieval import WebFAQRetrieval
|
|
85
97
|
from .wikipedia_retrieval_multilingual import WikipediaRetrievalMultilingual
|
|
86
98
|
from .wit_t2i_retrieval import WITT2IRetrieval
|
|
@@ -161,6 +173,16 @@ __all__ = [
|
|
|
161
173
|
"Vidore2ESGReportsHLRetrieval",
|
|
162
174
|
"Vidore2ESGReportsRetrieval",
|
|
163
175
|
"Vidore2EconomicsReportsRetrieval",
|
|
176
|
+
"Vidore3ComputerScienceRetrieval",
|
|
177
|
+
"Vidore3EnergyRetrieval",
|
|
178
|
+
"Vidore3FinanceEnRetrieval",
|
|
179
|
+
"Vidore3FinanceFrRetrieval",
|
|
180
|
+
"Vidore3HrRetrieval",
|
|
181
|
+
"Vidore3IndustrialRetrieval",
|
|
182
|
+
"Vidore3NuclearRetrieval",
|
|
183
|
+
"Vidore3PharmaceuticalsRetrieval",
|
|
184
|
+
"Vidore3PhysicsRetrieval",
|
|
185
|
+
"Vidore3TelecomRetrieval",
|
|
164
186
|
"WITT2IRetrieval",
|
|
165
187
|
"WebFAQRetrieval",
|
|
166
188
|
"WikipediaRetrievalMultilingual",
|
|
@@ -132,7 +132,7 @@ _LANGUAGES = [
|
|
|
132
132
|
|
|
133
133
|
|
|
134
134
|
def get_lang_pairs() -> dict[str, list[str]]:
|
|
135
|
-
# add pairs with same
|
|
135
|
+
# add pairs with same language as the source and target
|
|
136
136
|
# add pairs with english as source or target
|
|
137
137
|
lang_pairs = {}
|
|
138
138
|
for x in _LANGUAGES:
|
|
@@ -230,10 +230,11 @@ class BelebeleRetrieval(AbsTaskRetrieval):
|
|
|
230
230
|
ds_corpus = self.dataset[lang_corpus]
|
|
231
231
|
ds_question = self.dataset[lang_question]
|
|
232
232
|
|
|
233
|
-
question_ids = {
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
233
|
+
question_ids = {}
|
|
234
|
+
for row in ds_question:
|
|
235
|
+
question = row["question"]
|
|
236
|
+
if question not in question_ids:
|
|
237
|
+
question_ids[question] = len(question_ids)
|
|
237
238
|
|
|
238
239
|
link_to_context_id = {}
|
|
239
240
|
context_idx = 0
|
|
@@ -72,7 +72,6 @@ def _load_single_language(
|
|
|
72
72
|
lambda x: {
|
|
73
73
|
"id": f"query-{split}-{x['query-id']}",
|
|
74
74
|
"text": x["query"],
|
|
75
|
-
"image": None,
|
|
76
75
|
"modality": "text",
|
|
77
76
|
},
|
|
78
77
|
remove_columns=["query-id", "query"],
|
|
@@ -87,7 +86,6 @@ def _load_single_language(
|
|
|
87
86
|
corpus_ds = corpus_ds.map(
|
|
88
87
|
lambda x: {
|
|
89
88
|
"id": f"corpus-{split}-{x['corpus-id']}",
|
|
90
|
-
"text": None,
|
|
91
89
|
"modality": "image",
|
|
92
90
|
},
|
|
93
91
|
remove_columns=["corpus-id"],
|
|
@@ -92,7 +92,7 @@ class MIRACLRetrievalHardNegativesV2(AbsTaskRetrieval):
|
|
|
92
92
|
"MIRACL (Multilingual Information Retrieval Across a Continuum of Languages) is a multilingual retrieval "
|
|
93
93
|
"dataset that focuses on search across 18 different languages. The hard negative version has been "
|
|
94
94
|
"created by pooling the 250 top documents per query from BM25, e5-multilingual-large and e5-mistral-instruct."
|
|
95
|
-
"V2 uses a more appropriate prompt rather than the default prompt for retrieval."
|
|
95
|
+
"V2 uses a more appropriate prompt rather than the default prompt for retrieval. You can get more information on the effect of different prompt in the [PR](https://github.com/embeddings-benchmark/mteb/pull/3469#issuecomment-3436467106)"
|
|
96
96
|
),
|
|
97
97
|
dataset={
|
|
98
98
|
"path": "mteb/MIRACLRetrievalHardNegatives",
|
|
@@ -30,7 +30,7 @@ _LANGUAGES = {
|
|
|
30
30
|
def _load_miracl_data(
|
|
31
31
|
path: str,
|
|
32
32
|
langs: list,
|
|
33
|
-
splits: str,
|
|
33
|
+
splits: list[str],
|
|
34
34
|
revision: str | None = None,
|
|
35
35
|
):
|
|
36
36
|
corpus = {lang: dict.fromkeys(splits) for lang in langs}
|
|
@@ -65,9 +65,7 @@ def _load_miracl_data(
|
|
|
65
65
|
images_data = images_data.map(
|
|
66
66
|
lambda x: {
|
|
67
67
|
"id": imgid2docid[str(x["file_name"])],
|
|
68
|
-
# "modality": "text",
|
|
69
68
|
"modality": "image",
|
|
70
|
-
"text": None,
|
|
71
69
|
},
|
|
72
70
|
remove_columns=["file_name"],
|
|
73
71
|
)
|
|
@@ -86,7 +84,6 @@ def _load_miracl_data(
|
|
|
86
84
|
"id": str(x["_id"]),
|
|
87
85
|
"text": x["text"],
|
|
88
86
|
"modality": "text",
|
|
89
|
-
"image": None,
|
|
90
87
|
},
|
|
91
88
|
remove_columns=["_id"],
|
|
92
89
|
)
|
|
@@ -108,10 +105,6 @@ def _load_miracl_data(
|
|
|
108
105
|
relevant_docs[lang][split][query_id] = {}
|
|
109
106
|
relevant_docs[lang][split][query_id][doc_id] = score
|
|
110
107
|
|
|
111
|
-
corpus = datasets.DatasetDict(corpus)
|
|
112
|
-
queries = datasets.DatasetDict(queries)
|
|
113
|
-
relevant_docs = datasets.DatasetDict(relevant_docs)
|
|
114
|
-
|
|
115
108
|
return corpus, queries, relevant_docs
|
|
116
109
|
|
|
117
110
|
|
|
@@ -156,7 +149,7 @@ class MIRACLVisionRetrieval(AbsTaskRetrieval):
|
|
|
156
149
|
|
|
157
150
|
self.corpus, self.queries, self.relevant_docs = _load_miracl_data(
|
|
158
151
|
path=self.metadata.dataset["path"],
|
|
159
|
-
splits=self.metadata.eval_splits
|
|
152
|
+
splits=self.metadata.eval_splits,
|
|
160
153
|
langs=self.hf_subsets,
|
|
161
154
|
revision=self.metadata.dataset["revision"],
|
|
162
155
|
)
|
|
@@ -34,8 +34,7 @@ _EVAL_LANGS = {
|
|
|
34
34
|
class MKQARetrieval(AbsTaskRetrieval):
|
|
35
35
|
metadata = TaskMetadata(
|
|
36
36
|
name="MKQARetrieval",
|
|
37
|
-
description="
|
|
38
|
-
For each query we collect new passage-independent answers. These queries and answers are then human translated into 25 Non-English languages.""",
|
|
37
|
+
description="Multilingual Knowledge Questions & Answers (MKQA)contains 10,000 queries sampled from the Google Natural Questions dataset. For each query we collect new passage-independent answers. These queries and answers are then human translated into 25 Non-English languages.",
|
|
39
38
|
reference="https://github.com/apple/ml-mkqa",
|
|
40
39
|
dataset={
|
|
41
40
|
"path": "mteb/MKQARetrieval",
|
|
@@ -75,10 +75,7 @@ _EVAL_LANGS = extend_lang_pairs()
|
|
|
75
75
|
class MLQARetrieval(AbsTaskRetrieval):
|
|
76
76
|
metadata = TaskMetadata(
|
|
77
77
|
name="MLQARetrieval",
|
|
78
|
-
description="
|
|
79
|
-
MLQA consists of over 5K extractive QA instances (12K in English) in SQuAD format in seven languages - English, Arabic,
|
|
80
|
-
German, Spanish, Hindi, Vietnamese and Simplified Chinese. MLQA is highly parallel, with QA instances parallel between
|
|
81
|
-
4 different languages on average.""",
|
|
78
|
+
description="MLQA (MultiLingual Question Answering) is a benchmark dataset for evaluating cross-lingual question answering performance. MLQA consists of over 5K extractive QA instances (12K in English) in SQuAD format in seven languages - English, Arabic, German, Spanish, Hindi, Vietnamese and Simplified Chinese. MLQA is highly parallel, with QA instances parallel between 4 different languages on average.",
|
|
82
79
|
reference="https://huggingface.co/datasets/mlqa",
|
|
83
80
|
dataset={
|
|
84
81
|
"path": "mteb/MLQARetrieval",
|
|
@@ -21,8 +21,7 @@ _LANGUAGES = {
|
|
|
21
21
|
class MultiLongDocRetrieval(AbsTaskRetrieval):
|
|
22
22
|
metadata = TaskMetadata(
|
|
23
23
|
name="MultiLongDocRetrieval",
|
|
24
|
-
description="
|
|
25
|
-
It is constructed by sampling lengthy articles from Wikipedia, Wudao and mC4 datasets and randomly choose paragraphs from them. Then we use GPT-3.5 to generate questions based on these paragraphs. The generated question and the sampled article constitute a new text pair to the dataset.""",
|
|
24
|
+
description="Multi Long Doc Retrieval (MLDR) 'is curated by the multilingual articles from Wikipedia, Wudao and mC4 (see Table 7), and NarrativeQA (Kocˇisky ́ et al., 2018; Gu ̈nther et al., 2023), which is only for English.' (Chen et al., 2024). It is constructed by sampling lengthy articles from Wikipedia, Wudao and mC4 datasets and randomly choose paragraphs from them. Then we use GPT-3.5 to generate questions based on these paragraphs. The generated question and the sampled article constitute a new text pair to the dataset.",
|
|
26
25
|
reference="https://arxiv.org/abs/2402.03216", # also: https://huggingface.co/datasets/Shitao/MLDR
|
|
27
26
|
dataset={
|
|
28
27
|
"path": "mteb/MultiLongDocRetrieval",
|
|
@@ -32,10 +32,15 @@ def _load_publichealthqa_data(
|
|
|
32
32
|
split=split,
|
|
33
33
|
revision=revision,
|
|
34
34
|
)
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
}
|
|
38
|
-
|
|
35
|
+
|
|
36
|
+
question_ids = {}
|
|
37
|
+
answer_ids = {}
|
|
38
|
+
|
|
39
|
+
for row in data:
|
|
40
|
+
if row["question"] is not None and row["question"] not in question_ids:
|
|
41
|
+
question_ids[row["question"]] = len(question_ids)
|
|
42
|
+
if row["answer"] is not None and row["answer"] not in answer_ids:
|
|
43
|
+
answer_ids[row["answer"]] = len(answer_ids)
|
|
39
44
|
|
|
40
45
|
for row in data:
|
|
41
46
|
if row["question"] is None or row["answer"] is None:
|
|
@@ -68,11 +68,7 @@ class RuSciBenchCiteRetrieval(AbsTaskRetrieval):
|
|
|
68
68
|
"path": "mlsa-iai-msu-lab/ru_sci_bench_cite_retrieval",
|
|
69
69
|
"revision": "6cb447d02f41b8b775d5d9df7faf472f44d2f1db",
|
|
70
70
|
},
|
|
71
|
-
description="
|
|
72
|
-
Russia's largest electronic library of scientific publications. Given a query paper (title and abstract),
|
|
73
|
-
the goal is to retrieve papers that are directly cited by it from a larger corpus of papers.
|
|
74
|
-
The dataset for this task consists of 3,000 query papers, 15,000 relevant (cited) papers,
|
|
75
|
-
and 75,000 irrelevant papers. The task is available for both Russian and English scientific texts.""",
|
|
71
|
+
description="This task is focused on Direct Citation Prediction for scientific papers from eLibrary, Russia's largest electronic library of scientific publications. Given a query paper (title and abstract), the goal is to retrieve papers that are directly cited by it from a larger corpus of papers. The dataset for this task consists of 3,000 query papers, 15,000 relevant (cited) papers, and 75,000 irrelevant papers. The task is available for both Russian and English scientific texts.",
|
|
76
72
|
reference="https://github.com/mlsa-iai-msu-lab/ru_sci_bench_mteb",
|
|
77
73
|
type="Retrieval",
|
|
78
74
|
category="t2t",
|
|
@@ -130,13 +126,7 @@ class RuSciBenchCociteRetrieval(AbsTaskRetrieval):
|
|
|
130
126
|
"path": "mlsa-iai-msu-lab/ru_sci_bench_cocite_retrieval",
|
|
131
127
|
"revision": "a5da47a245275669d2b6ddf8f96c5338dd2428b4",
|
|
132
128
|
},
|
|
133
|
-
description="
|
|
134
|
-
Russia's largest electronic library of scientific publications. Given a query paper (title and abstract),
|
|
135
|
-
the goal is to retrieve other papers that are co-cited with it. Two papers are considered co-cited
|
|
136
|
-
if they are both cited by at least 5 of the same other papers. Similar to the Direct Citation task,
|
|
137
|
-
this task employs a retrieval setup: for a given query paper, all other papers in the corpus that
|
|
138
|
-
are not co-cited with it are considered negative examples. The task is available for both Russian
|
|
139
|
-
and English scientific texts.""",
|
|
129
|
+
description="This task focuses on Co-citation Prediction for scientific papers from eLibrary, Russia's largest electronic library of scientific publications. Given a query paper (title and abstract), the goal is to retrieve other papers that are co-cited with it. Two papers are considered co-cited if they are both cited by at least 5 of the same other papers. Similar to the Direct Citation task, this task employs a retrieval setup: for a given query paper, all other papers in the corpus that are not co-cited with it are considered negative examples. The task is available for both Russian and English scientific texts.",
|
|
140
130
|
reference="https://github.com/mlsa-iai-msu-lab/ru_sci_bench_mteb",
|
|
141
131
|
type="Retrieval",
|
|
142
132
|
category="t2t",
|
|
@@ -37,7 +37,6 @@ def _load_data(
|
|
|
37
37
|
lambda x: {
|
|
38
38
|
"id": f"query-{split}-{x['query-id']}",
|
|
39
39
|
"text": x["query"],
|
|
40
|
-
"image": None,
|
|
41
40
|
"modality": "text",
|
|
42
41
|
},
|
|
43
42
|
remove_columns=["query-id", "query"],
|
|
@@ -52,7 +51,6 @@ def _load_data(
|
|
|
52
51
|
corpus_ds = corpus_ds.map(
|
|
53
52
|
lambda x: {
|
|
54
53
|
"id": f"corpus-{split}-{x['corpus-id']}",
|
|
55
|
-
"text": None,
|
|
56
54
|
"modality": "image",
|
|
57
55
|
},
|
|
58
56
|
remove_columns=["corpus-id"],
|