mteb 2.0.5__py3-none-any.whl → 2.1.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/__init__.py +10 -1
- mteb/_create_dataloaders.py +8 -3
- mteb/_evaluators/any_sts_evaluator.py +14 -12
- mteb/_evaluators/clustering_evaluator.py +1 -1
- mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +2 -2
- mteb/_evaluators/pair_classification_evaluator.py +3 -1
- mteb/_evaluators/retrieval_metrics.py +0 -9
- mteb/_evaluators/sklearn_evaluator.py +15 -28
- mteb/_evaluators/text/bitext_mining_evaluator.py +4 -1
- mteb/_evaluators/text/summarization_evaluator.py +4 -2
- mteb/_evaluators/zeroshot_classification_evaluator.py +2 -2
- mteb/abstasks/_stratification.py +1 -1
- mteb/abstasks/abstask.py +6 -1
- mteb/abstasks/clustering.py +1 -1
- mteb/abstasks/dataset_card_template.md +1 -1
- mteb/abstasks/multilabel_classification.py +2 -2
- mteb/abstasks/retrieval.py +2 -1
- mteb/abstasks/retrieval_dataset_loaders.py +1 -1
- mteb/abstasks/task_metadata.py +2 -1
- mteb/benchmarks/_create_table.py +1 -3
- mteb/benchmarks/benchmark.py +18 -1
- mteb/benchmarks/benchmarks/__init__.py +4 -0
- mteb/benchmarks/benchmarks/benchmarks.py +125 -16
- mteb/benchmarks/get_benchmark.py +3 -1
- mteb/cache.py +7 -3
- mteb/descriptive_stats/Classification/DutchColaClassification.json +54 -0
- mteb/descriptive_stats/Classification/DutchGovernmentBiasClassification.json +54 -0
- mteb/descriptive_stats/Classification/DutchNewsArticlesClassification.json +90 -0
- mteb/descriptive_stats/Classification/DutchSarcasticHeadlinesClassification.json +54 -0
- mteb/descriptive_stats/Classification/IconclassClassification.json +96 -0
- mteb/descriptive_stats/Classification/OpenTenderClassification.json +222 -0
- mteb/descriptive_stats/Classification/VaccinChatNLClassification.json +1068 -0
- mteb/descriptive_stats/Clustering/DutchNewsArticlesClusteringP2P.json +45 -0
- mteb/descriptive_stats/Clustering/DutchNewsArticlesClusteringS2S.json +45 -0
- mteb/descriptive_stats/Clustering/IconclassClusteringS2S.json +48 -0
- mteb/descriptive_stats/Clustering/OpenTenderClusteringP2P.json +111 -0
- mteb/descriptive_stats/Clustering/OpenTenderClusteringS2S.json +111 -0
- mteb/descriptive_stats/Clustering/VABBClusteringP2P.json +60 -0
- mteb/descriptive_stats/Clustering/VABBClusteringS2S.json +60 -0
- mteb/descriptive_stats/Image/Any2AnyMultilingualRetrieval/XFlickr30kCoT2IRetrieval.json +243 -153
- mteb/descriptive_stats/Image/Any2AnyMultilingualRetrieval/XM3600T2IRetrieval.json +999 -629
- mteb/descriptive_stats/Image/Any2AnyRetrieval/OVENIT2TRetrieval.json +33 -17
- mteb/descriptive_stats/Image/DocumentUnderstanding/MIRACLVisionRetrieval.json +574 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3ComputerScienceRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3EnergyRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceEnRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceFrRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3HrRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3IndustrialRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3NuclearRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PharmaceuticalsRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PhysicsRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3TelecomRetrieval.json +214 -0
- mteb/descriptive_stats/MultilabelClassification/CovidDisinformationNLMultiLabelClassification.json +84 -0
- mteb/descriptive_stats/MultilabelClassification/VABBMultiLabelClassification.json +156 -0
- mteb/descriptive_stats/PairClassification/SICKNLPairClassification.json +35 -0
- mteb/descriptive_stats/PairClassification/XLWICNLPairClassification.json +35 -0
- mteb/descriptive_stats/Retrieval/ArguAna-NL.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/ClimateFEVERHardNegatives.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/DBPediaHardNegatives.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/DutchNewsArticlesRetrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/FEVERHardNegatives.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/HotpotQAHardNegatives.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/LegalQANLRetrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/NFCorpus-NL.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/OpenTenderRetrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/QuoraRetrievalHardNegatives.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/RiaNewsRetrievalHardNegatives.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/SCIDOCS-NL.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/SciFact-NL.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/VABBRetrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/VDRMultilingualRetrieval.json +184 -0
- mteb/descriptive_stats/Retrieval/WinoGrande.json +14 -14
- mteb/descriptive_stats/Retrieval/bBSARDNLRetrieval.json +30 -0
- mteb/descriptive_stats/STS/SICK-NL-STS.json +28 -0
- mteb/evaluate.py +26 -6
- mteb/languages/check_language_code.py +11 -3
- mteb/languages/language_scripts.py +4 -0
- mteb/leaderboard/app.py +5 -3
- mteb/leaderboard/benchmark_selector.py +4 -2
- mteb/leaderboard/text_segments.py +1 -1
- mteb/models/cache_wrappers/cache_wrapper.py +1 -1
- mteb/models/instruct_wrapper.py +3 -0
- mteb/models/model_implementations/align_models.py +6 -0
- mteb/models/model_implementations/andersborges.py +51 -0
- mteb/models/model_implementations/ara_models.py +7 -0
- mteb/models/model_implementations/b1ade_models.py +1 -1
- mteb/models/model_implementations/bge_models.py +1 -3
- mteb/models/model_implementations/blip2_models.py +9 -0
- mteb/models/model_implementations/blip_models.py +19 -0
- mteb/models/model_implementations/bmretriever_models.py +1 -1
- mteb/models/model_implementations/cadet_models.py +8 -0
- mteb/models/model_implementations/cde_models.py +12 -0
- mteb/models/model_implementations/codefuse_models.py +15 -0
- mteb/models/model_implementations/codesage_models.py +12 -0
- mteb/models/model_implementations/cohere_models.py +1 -1
- mteb/models/model_implementations/colqwen_models.py +57 -0
- mteb/models/model_implementations/emillykkejensen_models.py +70 -0
- mteb/models/model_implementations/gme_v_models.py +2 -2
- mteb/models/model_implementations/ibm_granite_models.py +1 -1
- mteb/models/model_implementations/inf_models.py +3 -3
- mteb/models/model_implementations/jasper_models.py +253 -2
- mteb/models/model_implementations/jina_models.py +12 -2
- mteb/models/model_implementations/kalm_models.py +159 -25
- mteb/models/model_implementations/llm2vec_models.py +1 -1
- mteb/models/model_implementations/misc_models.py +8 -2
- mteb/models/model_implementations/moco_models.py +9 -0
- mteb/models/model_implementations/mxbai_models.py +1 -1
- mteb/models/model_implementations/openclip_models.py +16 -0
- mteb/models/model_implementations/piccolo_models.py +6 -0
- mteb/models/model_implementations/rasgaard_models.py +33 -0
- mteb/models/model_implementations/reasonir_model.py +1 -1
- mteb/models/model_implementations/salesforce_models.py +1 -1
- mteb/models/model_implementations/seed_1_6_embedding_models.py +1 -1
- mteb/models/model_implementations/spartan8806_atles_champion.py +26 -0
- mteb/models/model_implementations/tarka_models.py +374 -0
- mteb/models/model_implementations/voyage_models.py +6 -7
- mteb/models/model_implementations/voyage_v.py +10 -9
- mteb/models/model_implementations/yuan_models.py +33 -0
- mteb/models/search_wrappers.py +6 -5
- mteb/results/task_result.py +19 -17
- mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +4 -2
- mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining_fast.py +1 -1
- mteb/tasks/bitext_mining/multilingual/ru_sci_bench_bitext_mining.py +1 -5
- mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -6
- mteb/tasks/classification/ara/ajgt.py +1 -2
- mteb/tasks/classification/ara/hotel_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/ara/tweet_emotion_classification.py +1 -2
- mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -2
- mteb/tasks/classification/ben/bengali_document_classification.py +1 -2
- mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -2
- mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -2
- mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/ces/czech_product_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/ces/czech_so_me_sentiment_classification.py +1 -2
- mteb/tasks/classification/dan/angry_tweets_classification.py +2 -3
- mteb/tasks/classification/dan/danish_political_comments_classification.py +1 -2
- mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -2
- mteb/tasks/classification/dan/dk_hate_classification.py +1 -2
- mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -2
- mteb/tasks/classification/deu/ten_k_gnad_classification.py +1 -2
- mteb/tasks/classification/eng/amazon_polarity_classification.py +1 -2
- mteb/tasks/classification/eng/arxiv_classification.py +1 -2
- mteb/tasks/classification/eng/banking77_classification.py +1 -2
- mteb/tasks/classification/eng/dbpedia_classification.py +1 -2
- mteb/tasks/classification/eng/emotion_classification.py +1 -2
- mteb/tasks/classification/eng/financial_phrasebank_classification.py +1 -2
- mteb/tasks/classification/eng/frenk_en_classification.py +1 -2
- mteb/tasks/classification/eng/gtsrb_classification.py +1 -1
- mteb/tasks/classification/eng/imdb_classification.py +1 -2
- mteb/tasks/classification/eng/legal_bench_classification.py +15 -121
- mteb/tasks/classification/eng/news_classification.py +1 -2
- mteb/tasks/classification/eng/patch_camelyon_classification.py +1 -1
- mteb/tasks/classification/eng/patent_classification.py +1 -2
- mteb/tasks/classification/eng/poem_sentiment_classification.py +1 -2
- mteb/tasks/classification/eng/sds_eye_protection_classification.py +1 -2
- mteb/tasks/classification/eng/sds_gloves_classification.py +1 -2
- mteb/tasks/classification/eng/toxic_chat_classification.py +2 -19
- mteb/tasks/classification/eng/toxic_conversations_classification.py +1 -2
- mteb/tasks/classification/eng/tweet_sentiment_extraction_classification.py +1 -2
- mteb/tasks/classification/eng/tweet_topic_single_classification.py +2 -13
- mteb/tasks/classification/eng/ucf101_classification.py +1 -5
- mteb/tasks/classification/eng/wikipedia_bio_met_chem_classification.py +1 -2
- mteb/tasks/classification/eng/wikipedia_chem_fields_classification.py +1 -2
- mteb/tasks/classification/eng/wikipedia_comp_chem_spectroscopy_classification.py +1 -2
- mteb/tasks/classification/eng/wikipedia_crystallography_analytical_classification.py +1 -2
- mteb/tasks/classification/eng/wikipedia_theoretical_applied_classification.py +1 -2
- mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -2
- mteb/tasks/classification/eng/yelp_review_full_classification.py +1 -2
- mteb/tasks/classification/est/estonian_valence.py +1 -2
- mteb/tasks/classification/fas/fa_mteb_classification.py +7 -14
- mteb/tasks/classification/fil/filipino_hate_speech_classification.py +1 -2
- mteb/tasks/classification/fin/fin_toxicity_classification.py +2 -11
- mteb/tasks/classification/fra/french_book_reviews.py +1 -2
- mteb/tasks/classification/fra/movie_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/guj/gujarati_news_classification.py +1 -2
- mteb/tasks/classification/heb/hebrew_sentiment_analysis.py +1 -2
- mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -2
- mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -2
- mteb/tasks/classification/hrv/frenk_hr_classification.py +1 -2
- mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +1 -2
- mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -2
- mteb/tasks/classification/ita/italian_linguist_acceptability_classification.py +1 -2
- mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -2
- mteb/tasks/classification/jpn/wrime_classification.py +1 -2
- mteb/tasks/classification/kan/kannada_news_classification.py +1 -2
- mteb/tasks/classification/kor/klue_tc.py +1 -2
- mteb/tasks/classification/kor/kor_hate_classification.py +2 -17
- mteb/tasks/classification/kor/kor_sarcasm_classification.py +2 -19
- mteb/tasks/classification/kur/kurdish_sentiment_classification.py +1 -2
- mteb/tasks/classification/mal/malayalam_news_classification.py +1 -2
- mteb/tasks/classification/mar/marathi_news_classification.py +1 -2
- mteb/tasks/classification/mkd/macedonian_tweet_sentiment_classification.py +1 -2
- mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -6
- mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -4
- mteb/tasks/classification/multilingual/ru_sci_bench_classification.py +4 -23
- mteb/tasks/classification/multilingual/scala_classification.py +1 -2
- mteb/tasks/classification/multilingual/sib200_classification.py +1 -6
- mteb/tasks/classification/mya/myanmar_news.py +2 -3
- mteb/tasks/classification/nep/nepali_news_classification.py +1 -2
- mteb/tasks/classification/nld/__init__.py +16 -0
- mteb/tasks/classification/nld/dutch_book_review_sentiment_classification.py +4 -2
- mteb/tasks/classification/nld/dutch_cola_classification.py +41 -0
- mteb/tasks/classification/nld/dutch_government_bias_classification.py +40 -0
- mteb/tasks/classification/nld/dutch_news_articles_classification.py +33 -0
- mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +39 -0
- mteb/tasks/classification/nld/iconclass_classification.py +44 -0
- mteb/tasks/classification/nld/open_tender_classification.py +41 -0
- mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +49 -0
- mteb/tasks/classification/nob/no_rec_classification.py +1 -2
- mteb/tasks/classification/nob/norwegian_parliament_classification.py +1 -2
- mteb/tasks/classification/ory/odia_news_classification.py +1 -2
- mteb/tasks/classification/pol/polish_classification.py +3 -6
- mteb/tasks/classification/ron/moroco.py +1 -2
- mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -2
- mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -2
- mteb/tasks/classification/rus/georeview_classification.py +1 -2
- mteb/tasks/classification/rus/headline_classification.py +1 -2
- mteb/tasks/classification/rus/inappropriateness_classification.py +1 -2
- mteb/tasks/classification/rus/ru_reviews_classification.py +1 -2
- mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -2
- mteb/tasks/classification/rus/senti_ru_eval.py +1 -2
- mteb/tasks/classification/sin/sinhala_news_classification.py +1 -2
- mteb/tasks/classification/sin/sinhala_news_source_classification.py +1 -2
- mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/slk/slovak_hate_speech_classification.py +1 -2
- mteb/tasks/classification/slk/slovak_movie_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/slv/frenk_sl_classification.py +1 -2
- mteb/tasks/classification/spa/spanish_news_classification.py +1 -2
- mteb/tasks/classification/spa/spanish_sentiment_classification.py +1 -2
- mteb/tasks/classification/ssw/siswati_news_classification.py +1 -2
- mteb/tasks/classification/swa/swahili_news_classification.py +1 -2
- mteb/tasks/classification/swe/dalaj_classification.py +1 -2
- mteb/tasks/classification/swe/swe_rec_classification.py +1 -2
- mteb/tasks/classification/swe/swedish_sentiment_classification.py +1 -2
- mteb/tasks/classification/tam/tamil_news_classification.py +1 -2
- mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +1 -2
- mteb/tasks/classification/tha/wisesight_sentiment_classification.py +1 -2
- mteb/tasks/classification/tha/wongnai_reviews_classification.py +1 -1
- mteb/tasks/classification/tsn/tswana_news_classification.py +1 -2
- mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +1 -2
- mteb/tasks/classification/tur/turkish_product_sentiment_classification.py +1 -2
- mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -15
- mteb/tasks/classification/urd/urdu_roman_sentiment_classification.py +1 -2
- mteb/tasks/classification/vie/amazon_counterfactual_vn_classification.py +1 -6
- mteb/tasks/classification/vie/amazon_polarity_vn_classification.py +1 -6
- mteb/tasks/classification/vie/amazon_reviews_vn_classification.py +1 -5
- mteb/tasks/classification/vie/banking77_vn_classification.py +1 -5
- mteb/tasks/classification/vie/emotion_vn_classification.py +1 -5
- mteb/tasks/classification/vie/imdb_vn_classification.py +1 -5
- mteb/tasks/classification/vie/massive_intent_vn_classification.py +1 -5
- mteb/tasks/classification/vie/massive_scenario_vn_classification.py +1 -5
- mteb/tasks/classification/vie/mtop_domain_vn_classification.py +1 -5
- mteb/tasks/classification/vie/mtop_intent_vn_classification.py +1 -5
- mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -5
- mteb/tasks/classification/vie/tweet_sentiment_extraction_vn_classification.py +1 -5
- mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -2
- mteb/tasks/classification/zho/cmteb_classification.py +5 -10
- mteb/tasks/classification/zho/yue_openrice_review_classification.py +1 -2
- mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -2
- mteb/tasks/clustering/__init__.py +1 -0
- mteb/tasks/clustering/jpn/mews_c16_ja_clustering.py +1 -3
- mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -6
- mteb/tasks/clustering/nld/__init__.py +17 -0
- mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +40 -0
- mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +40 -0
- mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +50 -0
- mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +54 -0
- mteb/tasks/clustering/nld/open_tender_clustering_s2s.py +44 -0
- mteb/tasks/clustering/nld/vabb_clustering_p2p.py +54 -0
- mteb/tasks/clustering/nld/vabb_clustering_s2s.py +54 -0
- mteb/tasks/clustering/vie/reddit_clustering_p2p_vn.py +1 -5
- mteb/tasks/clustering/vie/reddit_clustering_vn.py +1 -5
- mteb/tasks/clustering/vie/stack_exchange_clustering_p2p_vn.py +1 -5
- mteb/tasks/clustering/vie/stack_exchange_clustering_vn.py +1 -5
- mteb/tasks/clustering/vie/twenty_newsgroups_clustering_vn.py +1 -5
- mteb/tasks/multilabel_classification/__init__.py +1 -0
- mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -5
- mteb/tasks/multilabel_classification/kor/kor_hate_speech_ml_classification.py +1 -9
- mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -6
- mteb/tasks/multilabel_classification/nld/__init__.py +9 -0
- mteb/tasks/multilabel_classification/nld/covid_disinformation_nl_multi_label_classification.py +91 -0
- mteb/tasks/multilabel_classification/nld/vabb_multi_label_classification.py +47 -0
- mteb/tasks/multilabel_classification/por/brazilian_toxic_tweets_classification.py +1 -6
- mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
- mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -2
- mteb/tasks/pair_classification/__init__.py +1 -0
- mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -6
- mteb/tasks/pair_classification/eng/legal_bench_pc.py +1 -9
- mteb/tasks/pair_classification/multilingual/indic_xnli_pair_classification.py +9 -8
- mteb/tasks/pair_classification/nld/__init__.py +7 -0
- mteb/tasks/pair_classification/nld/sick_nl_pair_classification.py +39 -0
- mteb/tasks/pair_classification/nld/xlwic_nl_pair_classification.py +44 -0
- mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -5
- mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -5
- mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -5
- mteb/tasks/regression/multilingual/ru_sci_bench_regression.py +2 -6
- mteb/tasks/reranking/multilingual/x_glue_wpr_reranking.py +1 -2
- mteb/tasks/reranking/vie/ask_ubuntu_dup_questions_vn.py +1 -5
- mteb/tasks/reranking/vie/sci_docs_reranking_vn.py +1 -5
- mteb/tasks/reranking/vie/stack_overflow_dup_questions_vn.py +1 -5
- mteb/tasks/retrieval/code/code_rag.py +8 -8
- mteb/tasks/retrieval/dan/dan_fever_retrieval.py +1 -1
- mteb/tasks/retrieval/dan/tv2_nordretrieval.py +2 -2
- mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +2 -2
- mteb/tasks/retrieval/eng/__init__.py +18 -4
- mteb/tasks/retrieval/eng/climate_fever_retrieval.py +68 -77
- mteb/tasks/retrieval/eng/dbpedia_retrieval.py +55 -50
- mteb/tasks/retrieval/eng/fever_retrieval.py +62 -67
- mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +0 -4
- mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +0 -4
- mteb/tasks/retrieval/eng/hotpot_qa_retrieval.py +57 -67
- mteb/tasks/retrieval/eng/legal_summarization_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -8
- mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +0 -3
- mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +0 -2
- mteb/tasks/retrieval/eng/oven_it2t_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/quora_retrieval.py +51 -46
- mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +0 -4
- mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +0 -4
- mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +0 -2
- mteb/tasks/retrieval/eng/wino_grande_retrieval.py +1 -1
- mteb/tasks/retrieval/jpn/ja_cwir_retrieval.py +1 -4
- mteb/tasks/retrieval/jpn/ja_gov_faqs_retrieval.py +1 -1
- mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +11 -4
- mteb/tasks/retrieval/multilingual/__init__.py +22 -0
- mteb/tasks/retrieval/multilingual/belebele_retrieval.py +6 -5
- mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +0 -2
- mteb/tasks/retrieval/multilingual/miracl_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +2 -9
- mteb/tasks/retrieval/multilingual/mkqa_retrieval.py +1 -2
- mteb/tasks/retrieval/multilingual/mlqa_retrieval.py +1 -4
- mteb/tasks/retrieval/multilingual/multi_long_doc_retrieval.py +1 -2
- mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +9 -4
- mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +2 -12
- mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +0 -2
- mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +399 -0
- mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +0 -2
- mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +6 -5
- mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +3 -4
- mteb/tasks/retrieval/nld/__init__.py +18 -4
- mteb/tasks/retrieval/nld/argu_ana_nl_retrieval.py +46 -27
- mteb/tasks/retrieval/nld/bbsard_nl_retrieval.py +44 -0
- mteb/tasks/retrieval/nld/dutch_news_articles_retrieval.py +33 -0
- mteb/tasks/retrieval/nld/legal_qa_nl_retrieval.py +42 -0
- mteb/tasks/retrieval/nld/nf_corpus_nl_retrieval.py +42 -25
- mteb/tasks/retrieval/nld/open_tender_retrieval.py +41 -0
- mteb/tasks/retrieval/nld/sci_fact_nl_retrieval.py +42 -24
- mteb/tasks/retrieval/nld/scidocsnl_retrieval.py +44 -27
- mteb/tasks/retrieval/nld/vabb_retrieval.py +44 -0
- mteb/tasks/retrieval/nob/norquad.py +2 -2
- mteb/tasks/retrieval/nob/snl_retrieval.py +2 -2
- mteb/tasks/retrieval/rus/__init__.py +11 -2
- mteb/tasks/retrieval/rus/ria_news_retrieval.py +48 -44
- mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -7
- mteb/tasks/retrieval/tur/tur_hist_quad.py +2 -2
- mteb/tasks/retrieval/vie/argu_ana_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_android_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_gis_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_mathematica_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_physics_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_programmers_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_stats_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_tex_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_unix_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_webmasters_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_wordpress_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/fevervn_retrieval.py +1 -7
- mteb/tasks/retrieval/vie/fi_qa2018_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/green_node_table_markdown_retrieval.py +16 -1
- mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +1 -6
- mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/nf_corpus_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/nqvn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/quora_vn_retrieval.py +1 -6
- mteb/tasks/retrieval/vie/sci_fact_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/scidocsvn_retrieval.py +1 -6
- mteb/tasks/retrieval/vie/touche2020_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/treccovidvn_retrieval.py +1 -5
- mteb/tasks/sts/__init__.py +1 -0
- mteb/tasks/sts/nld/__init__.py +5 -0
- mteb/tasks/sts/nld/sick_nl_sts.py +42 -0
- mteb/tasks/sts/vie/biosses_stsvn.py +1 -5
- mteb/tasks/sts/vie/sickr_stsvn.py +1 -5
- mteb/tasks/sts/vie/sts_benchmark_stsvn.py +1 -5
- mteb/tasks/zeroshot_classification/eng/gtsrb.py +1 -1
- mteb/tasks/zeroshot_classification/eng/patch_camelyon.py +1 -1
- mteb/tasks/zeroshot_classification/eng/ucf101.py +1 -5
- mteb-2.1.19.dist-info/METADATA +253 -0
- {mteb-2.0.5.dist-info → mteb-2.1.19.dist-info}/RECORD +398 -330
- mteb/descriptive_stats/Classification/PersianTextTone.json +0 -56
- mteb/descriptive_stats/Image/Any2TextMutipleChoice/CVBenchCount.json +0 -37
- mteb/descriptive_stats/Image/Any2TextMutipleChoice/CVBenchDepth.json +0 -25
- mteb/descriptive_stats/Image/Any2TextMutipleChoice/CVBenchDistance.json +0 -25
- mteb/descriptive_stats/Image/Any2TextMutipleChoice/CVBenchRelation.json +0 -25
- mteb/descriptive_stats/Image/VisualSTS/STS12VisualSTS.json +0 -20
- mteb/descriptive_stats/Image/VisualSTS/STS13VisualSTS.json +0 -20
- mteb/descriptive_stats/Image/VisualSTS/STS14VisualSTS.json +0 -20
- mteb/descriptive_stats/Image/VisualSTS/STS15VisualSTS.json +0 -20
- mteb/descriptive_stats/Image/VisualSTS/STS16VisualSTS.json +0 -20
- mteb/descriptive_stats/Image/VisualSTS/STS17MultilingualVisualSTS.json +0 -220
- mteb/descriptive_stats/Image/VisualSTS/STSBenchmarkMultilingualVisualSTS.json +0 -402
- mteb/descriptive_stats/Reranking/InstructIR.json +0 -31
- mteb-2.0.5.dist-info/METADATA +0 -455
- {mteb-2.0.5.dist-info → mteb-2.1.19.dist-info}/WHEEL +0 -0
- {mteb-2.0.5.dist-info → mteb-2.1.19.dist-info}/entry_points.txt +0 -0
- {mteb-2.0.5.dist-info → mteb-2.1.19.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.0.5.dist-info → mteb-2.1.19.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
from mteb.abstasks.retrieval import AbsTaskRetrieval
|
|
2
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class DutchNewsArticlesRetrieval(AbsTaskRetrieval):
|
|
6
|
+
metadata = TaskMetadata(
|
|
7
|
+
name="DutchNewsArticlesRetrieval",
|
|
8
|
+
description="This dataset contains all the articles published by the NOS as of the 1st of January 2010. The "
|
|
9
|
+
"data is obtained by scraping the NOS website. The NOS is one of the biggest (online) news "
|
|
10
|
+
"organizations in the Netherlands.",
|
|
11
|
+
reference="https://www.kaggle.com/datasets/maxscheijen/dutch-news-articles",
|
|
12
|
+
dataset={
|
|
13
|
+
"path": "clips/mteb-nl-news-articles-ret",
|
|
14
|
+
"revision": "c8042a86f3eb0d1fcec79a4a44ebf1eafe635462",
|
|
15
|
+
},
|
|
16
|
+
type="Retrieval",
|
|
17
|
+
category="t2t",
|
|
18
|
+
modalities=["text"],
|
|
19
|
+
eval_splits=["test"],
|
|
20
|
+
eval_langs=["nld-Latn"],
|
|
21
|
+
main_score="ndcg_at_10",
|
|
22
|
+
date=("2009-11-01", "2010-01-01"),
|
|
23
|
+
domains=["Written", "News"],
|
|
24
|
+
task_subtypes=["Article retrieval"],
|
|
25
|
+
license="cc-by-nc-sa-4.0",
|
|
26
|
+
annotations_creators="derived",
|
|
27
|
+
dialect=[],
|
|
28
|
+
sample_creation="found",
|
|
29
|
+
bibtex_citation="",
|
|
30
|
+
prompt={
|
|
31
|
+
"query": "Gegeven een titel, haal het nieuwsartikel op dat het beste bij de titel past"
|
|
32
|
+
},
|
|
33
|
+
)
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
from mteb.abstasks.retrieval import AbsTaskRetrieval
|
|
2
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class LegalQANLRetrieval(AbsTaskRetrieval):
|
|
6
|
+
ignore_identical_ids = True
|
|
7
|
+
|
|
8
|
+
metadata = TaskMetadata(
|
|
9
|
+
name="LegalQANLRetrieval",
|
|
10
|
+
description="To this end, we create and publish a Dutch legal QA dataset, consisting of question-answer pairs "
|
|
11
|
+
"with attributions to Dutch law articles.",
|
|
12
|
+
reference="https://aclanthology.org/2024.nllp-1.12/",
|
|
13
|
+
dataset={
|
|
14
|
+
"path": "clips/mteb-nl-legalqa-pr",
|
|
15
|
+
"revision": "8f593522dfbe7ec07055ca9d38a700e7643d3882",
|
|
16
|
+
},
|
|
17
|
+
type="Retrieval",
|
|
18
|
+
category="t2t",
|
|
19
|
+
modalities=["text"],
|
|
20
|
+
eval_splits=["test"],
|
|
21
|
+
eval_langs=["nld-Latn"],
|
|
22
|
+
main_score="ndcg_at_10",
|
|
23
|
+
date=("2021-05-01", "2021-08-26"),
|
|
24
|
+
domains=["Legal", "Written"],
|
|
25
|
+
task_subtypes=[],
|
|
26
|
+
license="cc-by-nc-sa-4.0",
|
|
27
|
+
annotations_creators="expert-annotated",
|
|
28
|
+
dialect=[],
|
|
29
|
+
sample_creation="found",
|
|
30
|
+
bibtex_citation=r"""
|
|
31
|
+
@inproceedings{redelaar2024attributed,
|
|
32
|
+
author = {Redelaar, Felicia and Van Drie, Romy and Verberne, Suzan and De Boer, Maaike},
|
|
33
|
+
booktitle = {Proceedings of the natural legal language processing workshop 2024},
|
|
34
|
+
pages = {154--165},
|
|
35
|
+
title = {Attributed Question Answering for Preconditions in the Dutch Law},
|
|
36
|
+
year = {2024},
|
|
37
|
+
}
|
|
38
|
+
""",
|
|
39
|
+
prompt={
|
|
40
|
+
"query": "Gegeven een juridische vraag, haal documenten op die kunnen helpen bij het beantwoorden van de vraag"
|
|
41
|
+
},
|
|
42
|
+
)
|
|
@@ -1,31 +1,26 @@
|
|
|
1
1
|
from mteb.abstasks.retrieval import AbsTaskRetrieval
|
|
2
2
|
from mteb.abstasks.task_metadata import TaskMetadata
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
license="cc-by-4.0",
|
|
25
|
-
annotations_creators="derived",
|
|
26
|
-
dialect=[],
|
|
27
|
-
sample_creation="machine-translated and verified", # manually checked a small subset
|
|
28
|
-
bibtex_citation=r"""
|
|
4
|
+
_nf_corpus_metadata = dict(
|
|
5
|
+
dataset={
|
|
6
|
+
"path": "clips/beir-nl-nfcorpus",
|
|
7
|
+
"revision": "942953e674fd0f619ff89897abb806dc3df5dd39",
|
|
8
|
+
},
|
|
9
|
+
reference="https://huggingface.co/datasets/clips/beir-nl-nfcorpus",
|
|
10
|
+
type="Retrieval",
|
|
11
|
+
category="t2t",
|
|
12
|
+
modalities=["text"],
|
|
13
|
+
eval_splits=["test"],
|
|
14
|
+
eval_langs=["nld-Latn"],
|
|
15
|
+
main_score="ndcg_at_10",
|
|
16
|
+
date=("2016-03-01", "2016-03-01"), # best guess: based on publication date
|
|
17
|
+
domains=["Medical", "Academic", "Written"],
|
|
18
|
+
task_subtypes=[],
|
|
19
|
+
license="cc-by-4.0",
|
|
20
|
+
annotations_creators="derived",
|
|
21
|
+
dialect=[],
|
|
22
|
+
sample_creation="machine-translated and verified", # manually checked a small subset
|
|
23
|
+
bibtex_citation=r"""
|
|
29
24
|
@misc{banar2024beirnlzeroshotinformationretrieval,
|
|
30
25
|
archiveprefix = {arXiv},
|
|
31
26
|
author = {Nikolay Banar and Ehsan Lotfi and Walter Daelemans},
|
|
@@ -36,5 +31,27 @@ class NFCorpusNL(AbsTaskRetrieval):
|
|
|
36
31
|
year = {2024},
|
|
37
32
|
}
|
|
38
33
|
""",
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class NFCorpusNL(AbsTaskRetrieval):
|
|
38
|
+
metadata = TaskMetadata(
|
|
39
|
+
name="NFCorpus-NL",
|
|
40
|
+
description="NFCorpus: A Full-Text Learning to Rank Dataset for Medical Information Retrieval. NFCorpus-NL is "
|
|
41
|
+
"a Dutch translation.",
|
|
39
42
|
adapted_from=["NFCorpus"],
|
|
43
|
+
**_nf_corpus_metadata,
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class NFCorpusNLv2(AbsTaskRetrieval):
|
|
48
|
+
metadata = TaskMetadata(
|
|
49
|
+
name="NFCorpus-NL.v2",
|
|
50
|
+
description="NFCorpus: A Full-Text Learning to Rank Dataset for Medical Information Retrieval. NFCorpus-NL is "
|
|
51
|
+
"a Dutch translation. This version adds a Dutch prompt to the dataset.",
|
|
52
|
+
adapted_from=["NFCorpus-NL"],
|
|
53
|
+
prompt={
|
|
54
|
+
"query": "Gegeven een vraag, haal relevante documenten op die de vraag het beste beantwoorden"
|
|
55
|
+
},
|
|
56
|
+
**_nf_corpus_metadata,
|
|
40
57
|
)
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
from mteb.abstasks.retrieval import AbsTaskRetrieval
|
|
2
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class OpenTenderRetrieval(AbsTaskRetrieval):
|
|
6
|
+
metadata = TaskMetadata(
|
|
7
|
+
name="OpenTenderRetrieval",
|
|
8
|
+
description="This dataset contains Belgian and Dutch tender calls from OpenTender in Dutch",
|
|
9
|
+
reference="https://arxiv.org/abs/2509.12340",
|
|
10
|
+
dataset={
|
|
11
|
+
"path": "clips/mteb-nl-opentender-ret",
|
|
12
|
+
"revision": "83eec1aa9c58f1dc8acfac015f653a9c25bda3f4",
|
|
13
|
+
},
|
|
14
|
+
type="Retrieval",
|
|
15
|
+
category="t2t",
|
|
16
|
+
modalities=["text"],
|
|
17
|
+
eval_splits=["test"],
|
|
18
|
+
eval_langs=["nld-Latn"],
|
|
19
|
+
main_score="ndcg_at_10",
|
|
20
|
+
date=("2009-11-01", "2010-01-01"),
|
|
21
|
+
domains=["Government", "Written"],
|
|
22
|
+
task_subtypes=["Article retrieval"],
|
|
23
|
+
license="cc-by-nc-sa-4.0",
|
|
24
|
+
annotations_creators="derived",
|
|
25
|
+
dialect=[],
|
|
26
|
+
sample_creation="found",
|
|
27
|
+
bibtex_citation=r"""
|
|
28
|
+
@misc{banar2025mtebnle5nlembeddingbenchmark,
|
|
29
|
+
archiveprefix = {arXiv},
|
|
30
|
+
author = {Nikolay Banar and Ehsan Lotfi and Jens Van Nooten and Cristina Arhiliuc and Marija Kliocaite and Walter Daelemans},
|
|
31
|
+
eprint = {2509.12340},
|
|
32
|
+
primaryclass = {cs.CL},
|
|
33
|
+
title = {MTEB-NL and E5-NL: Embedding Benchmark and Models for Dutch},
|
|
34
|
+
url = {https://arxiv.org/abs/2509.12340},
|
|
35
|
+
year = {2025},
|
|
36
|
+
}
|
|
37
|
+
""",
|
|
38
|
+
prompt={
|
|
39
|
+
"query": "Gegeven een titel, haal de aanbestedingsbeschrijving op die het beste bij de titel past"
|
|
40
|
+
},
|
|
41
|
+
)
|
|
@@ -1,30 +1,26 @@
|
|
|
1
1
|
from mteb.abstasks.retrieval import AbsTaskRetrieval
|
|
2
2
|
from mteb.abstasks.task_metadata import TaskMetadata
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
annotations_creators="derived",
|
|
25
|
-
dialect=[],
|
|
26
|
-
sample_creation="machine-translated and verified", # manually checked a small subset
|
|
27
|
-
bibtex_citation=r"""
|
|
4
|
+
_sci_fact_nl_metadata = dict(
|
|
5
|
+
dataset={
|
|
6
|
+
"path": "clips/beir-nl-scifact",
|
|
7
|
+
"revision": "856d8dfc294b138856bbf3042450e3782321e44e",
|
|
8
|
+
},
|
|
9
|
+
reference="https://huggingface.co/datasets/clips/beir-nl-scifact",
|
|
10
|
+
type="Retrieval",
|
|
11
|
+
category="t2t",
|
|
12
|
+
modalities=["text"],
|
|
13
|
+
eval_splits=["test"],
|
|
14
|
+
eval_langs=["nld-Latn"],
|
|
15
|
+
main_score="ndcg_at_10",
|
|
16
|
+
date=("2020-05-01", "2020-05-01"), # best guess: based on submission date
|
|
17
|
+
domains=["Academic", "Medical", "Written"],
|
|
18
|
+
task_subtypes=[],
|
|
19
|
+
license="cc-by-4.0",
|
|
20
|
+
annotations_creators="derived",
|
|
21
|
+
dialect=[],
|
|
22
|
+
sample_creation="machine-translated and verified", # manually checked a small subset
|
|
23
|
+
bibtex_citation=r"""
|
|
28
24
|
@misc{banar2024beirnlzeroshotinformationretrieval,
|
|
29
25
|
archiveprefix = {arXiv},
|
|
30
26
|
author = {Nikolay Banar and Ehsan Lotfi and Walter Daelemans},
|
|
@@ -35,5 +31,27 @@ class SciFactNL(AbsTaskRetrieval):
|
|
|
35
31
|
year = {2024},
|
|
36
32
|
}
|
|
37
33
|
""",
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class SciFactNL(AbsTaskRetrieval):
|
|
38
|
+
metadata = TaskMetadata(
|
|
39
|
+
name="SciFact-NL",
|
|
40
|
+
description="SciFactNL verifies scientific claims in Dutch using evidence from the research literature "
|
|
41
|
+
"containing scientific paper abstracts.",
|
|
38
42
|
adapted_from=["SciFact"],
|
|
43
|
+
**_sci_fact_nl_metadata,
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class SciFactNLv2(AbsTaskRetrieval):
|
|
48
|
+
metadata = TaskMetadata(
|
|
49
|
+
name="SciFact-NL.v2",
|
|
50
|
+
description="SciFactNL verifies scientific claims in Dutch using evidence from the research literature "
|
|
51
|
+
"containing scientific paper abstracts. This version adds a Dutch prompt to the dataset.",
|
|
52
|
+
adapted_from=["SciFact-NL"],
|
|
53
|
+
prompt={
|
|
54
|
+
"query": "Given a scientific claim, retrieve documents that support or refute the claim"
|
|
55
|
+
},
|
|
56
|
+
**_sci_fact_nl_metadata,
|
|
39
57
|
)
|
|
@@ -1,33 +1,26 @@
|
|
|
1
1
|
from mteb.abstasks.retrieval import AbsTaskRetrieval
|
|
2
2
|
from mteb.abstasks.task_metadata import TaskMetadata
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
domains=["Academic", "Written", "Non-fiction"],
|
|
25
|
-
task_subtypes=[],
|
|
26
|
-
license="cc-by-sa-4.0",
|
|
27
|
-
annotations_creators="derived",
|
|
28
|
-
dialect=[],
|
|
29
|
-
sample_creation="machine-translated and verified", # manually checked a small subset
|
|
30
|
-
bibtex_citation=r"""
|
|
4
|
+
_scidocsnl_metadata = dict(
|
|
5
|
+
dataset={
|
|
6
|
+
"path": "clips/beir-nl-scidocs",
|
|
7
|
+
"revision": "4e018aa220029f9d1bd5a31de3650e322e32ea38",
|
|
8
|
+
},
|
|
9
|
+
reference="https://huggingface.co/datasets/clips/beir-nl-scidocs",
|
|
10
|
+
type="Retrieval",
|
|
11
|
+
category="t2t",
|
|
12
|
+
modalities=["text"],
|
|
13
|
+
eval_splits=["test"],
|
|
14
|
+
eval_langs=["nld-Latn"],
|
|
15
|
+
main_score="ndcg_at_10",
|
|
16
|
+
date=("2020-05-01", "2020-05-01"), # best guess: based on submission date
|
|
17
|
+
domains=["Academic", "Written", "Non-fiction"],
|
|
18
|
+
task_subtypes=[],
|
|
19
|
+
license="cc-by-sa-4.0",
|
|
20
|
+
annotations_creators="derived",
|
|
21
|
+
dialect=[],
|
|
22
|
+
sample_creation="machine-translated and verified", # manually checked a small subset
|
|
23
|
+
bibtex_citation=r"""
|
|
31
24
|
@misc{banar2024beirnlzeroshotinformationretrieval,
|
|
32
25
|
archiveprefix = {arXiv},
|
|
33
26
|
author = {Nikolay Banar and Ehsan Lotfi and Walter Daelemans},
|
|
@@ -38,5 +31,29 @@ class SCIDOCSNL(AbsTaskRetrieval):
|
|
|
38
31
|
year = {2024},
|
|
39
32
|
}
|
|
40
33
|
""",
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class SCIDOCSNL(AbsTaskRetrieval):
|
|
38
|
+
metadata = TaskMetadata(
|
|
39
|
+
name="SCIDOCS-NL",
|
|
40
|
+
description="SciDocs, a new evaluation benchmark consisting of seven document-level tasks ranging from "
|
|
41
|
+
"citation prediction, to document classification and recommendation. SciDocs-NL is a Dutch "
|
|
42
|
+
"translation.",
|
|
41
43
|
adapted_from=["SCIDOCS"],
|
|
44
|
+
**_scidocsnl_metadata,
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class SCIDOCSNLv2(AbsTaskRetrieval):
|
|
49
|
+
metadata = TaskMetadata(
|
|
50
|
+
name="SCIDOCS-NL.v2",
|
|
51
|
+
description="SciDocs, a new evaluation benchmark consisting of seven document-level tasks ranging from "
|
|
52
|
+
"citation prediction, to document classification and recommendation. SciDocs-NL is a Dutch "
|
|
53
|
+
"translation. This version adds a Dutch prompt to the dataset.",
|
|
54
|
+
adapted_from=["SCIDOCS-NL"],
|
|
55
|
+
**_scidocsnl_metadata,
|
|
56
|
+
prompt={
|
|
57
|
+
"query": "Gegeven de titel van een wetenschappelijk artikel, haal de abstracts op van artikelen die door het gegeven artikel worden geciteerd"
|
|
58
|
+
},
|
|
42
59
|
)
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
from mteb.abstasks.retrieval import AbsTaskRetrieval
|
|
2
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class VABBRetrieval(AbsTaskRetrieval):
|
|
6
|
+
metadata = TaskMetadata(
|
|
7
|
+
name="VABBRetrieval",
|
|
8
|
+
description="This dataset contains the fourteenth edition of the Flemish Academic Bibliography for the Social "
|
|
9
|
+
"Sciences and Humanities (VABB-SHW), a database of academic publications from the social sciences "
|
|
10
|
+
"and humanities authored by researchers affiliated to Flemish universities (more information). "
|
|
11
|
+
"Publications in the database are used as one of the parameters of the Flemish performance-based "
|
|
12
|
+
"research funding system",
|
|
13
|
+
reference="https://zenodo.org/records/14214806",
|
|
14
|
+
dataset={
|
|
15
|
+
"path": "clips/mteb-nl-vabb-ret",
|
|
16
|
+
"revision": "af4a1e5b3ed451103894f86ff6b3ce85085d7b48",
|
|
17
|
+
},
|
|
18
|
+
type="Retrieval",
|
|
19
|
+
category="t2t",
|
|
20
|
+
modalities=["text"],
|
|
21
|
+
eval_splits=["test"],
|
|
22
|
+
eval_langs=["nld-Latn"],
|
|
23
|
+
main_score="ndcg_at_10",
|
|
24
|
+
date=("2009-11-01", "2010-01-01"),
|
|
25
|
+
domains=["Academic", "Written"],
|
|
26
|
+
task_subtypes=["Article retrieval"],
|
|
27
|
+
license="cc-by-nc-sa-4.0",
|
|
28
|
+
annotations_creators="derived",
|
|
29
|
+
dialect=[],
|
|
30
|
+
sample_creation="found",
|
|
31
|
+
bibtex_citation=r"""
|
|
32
|
+
@dataset{aspeslagh2024vabb,
|
|
33
|
+
author = {Aspeslagh, Pieter and Guns, Raf and Engels, Tim C. E.},
|
|
34
|
+
doi = {10.5281/zenodo.14214806},
|
|
35
|
+
publisher = {Zenodo},
|
|
36
|
+
title = {VABB-SHW: Dataset of Flemish Academic Bibliography for the Social Sciences and Humanities (edition 14)},
|
|
37
|
+
url = {https://doi.org/10.5281/zenodo.14214806},
|
|
38
|
+
year = {2024},
|
|
39
|
+
}
|
|
40
|
+
""",
|
|
41
|
+
prompt={
|
|
42
|
+
"query": "Gegeven een titel, haal de wetenschappelijke abstract op die het beste bij de titel past"
|
|
43
|
+
},
|
|
44
|
+
)
|
|
@@ -59,9 +59,9 @@ Fishel, Mark},
|
|
|
59
59
|
self.data_loaded = True
|
|
60
60
|
|
|
61
61
|
def dataset_transform(self) -> None:
|
|
62
|
-
"""And transform to a retrieval
|
|
62
|
+
"""And transform to a retrieval dataset, which have the following attributes
|
|
63
63
|
|
|
64
|
-
self.corpus = dict[doc_id, dict[str, str]] #id => dict with document
|
|
64
|
+
self.corpus = dict[doc_id, dict[str, str]] #id => dict with document data like title and text
|
|
65
65
|
self.queries = dict[query_id, str] #id => query
|
|
66
66
|
self.relevant_docs = dict[query_id, dict[[doc_id, score]]
|
|
67
67
|
"""
|
|
@@ -46,9 +46,9 @@ class SNLRetrieval(AbsTaskRetrieval):
|
|
|
46
46
|
self.data_loaded = True
|
|
47
47
|
|
|
48
48
|
def dataset_transform(self) -> None:
|
|
49
|
-
"""And transform to a retrieval
|
|
49
|
+
"""And transform to a retrieval dataset, which have the following attributes
|
|
50
50
|
|
|
51
|
-
self.corpus = dict[doc_id, dict[str, str]] #id => dict with document
|
|
51
|
+
self.corpus = dict[doc_id, dict[str, str]] #id => dict with document data like title and text
|
|
52
52
|
self.queries = dict[query_id, str] #id => query
|
|
53
53
|
self.relevant_docs = dict[query_id, dict[[doc_id, score]]
|
|
54
54
|
"""
|
|
@@ -1,4 +1,13 @@
|
|
|
1
|
-
from .ria_news_retrieval import
|
|
1
|
+
from .ria_news_retrieval import (
|
|
2
|
+
RiaNewsRetrieval,
|
|
3
|
+
RiaNewsRetrievalHardNegatives,
|
|
4
|
+
RiaNewsRetrievalHardNegativesV2,
|
|
5
|
+
)
|
|
2
6
|
from .ru_bq_retrieval import RuBQRetrieval
|
|
3
7
|
|
|
4
|
-
__all__ = [
|
|
8
|
+
__all__ = [
|
|
9
|
+
"RiaNewsRetrieval",
|
|
10
|
+
"RiaNewsRetrievalHardNegatives",
|
|
11
|
+
"RiaNewsRetrievalHardNegativesV2",
|
|
12
|
+
"RuBQRetrieval",
|
|
13
|
+
]
|
|
@@ -1,6 +1,31 @@
|
|
|
1
1
|
from mteb.abstasks.retrieval import AbsTaskRetrieval
|
|
2
2
|
from mteb.abstasks.task_metadata import TaskMetadata
|
|
3
3
|
|
|
4
|
+
_ria_news_metadata = dict(
|
|
5
|
+
reference="https://arxiv.org/abs/1901.07786",
|
|
6
|
+
type="Retrieval",
|
|
7
|
+
category="t2t",
|
|
8
|
+
modalities=["text"],
|
|
9
|
+
eval_splits=["test"],
|
|
10
|
+
eval_langs=["rus-Cyrl"],
|
|
11
|
+
main_score="ndcg_at_10",
|
|
12
|
+
date=("2010-01-01", "2014-12-31"),
|
|
13
|
+
domains=["News", "Written"],
|
|
14
|
+
task_subtypes=["Article retrieval"],
|
|
15
|
+
license="cc-by-nc-nd-4.0",
|
|
16
|
+
annotations_creators="derived",
|
|
17
|
+
dialect=[],
|
|
18
|
+
sample_creation="found",
|
|
19
|
+
bibtex_citation=r"""
|
|
20
|
+
@inproceedings{gavrilov2018self,
|
|
21
|
+
author = {Gavrilov, Daniil and Kalaidin, Pavel and Malykh, Valentin},
|
|
22
|
+
booktitle = {Proceedings of the 41st European Conference on Information Retrieval},
|
|
23
|
+
title = {Self-Attentive Model for Headline Generation},
|
|
24
|
+
year = {2019},
|
|
25
|
+
}
|
|
26
|
+
""",
|
|
27
|
+
)
|
|
28
|
+
|
|
4
29
|
|
|
5
30
|
class RiaNewsRetrieval(AbsTaskRetrieval):
|
|
6
31
|
ignore_identical_ids = True
|
|
@@ -12,29 +37,8 @@ class RiaNewsRetrieval(AbsTaskRetrieval):
|
|
|
12
37
|
"revision": "82374b0bbacda6114f39ff9c5b925fa1512ca5d7",
|
|
13
38
|
},
|
|
14
39
|
description="News article retrieval by headline. Based on Rossiya Segodnya dataset.",
|
|
15
|
-
reference="https://arxiv.org/abs/1901.07786",
|
|
16
|
-
type="Retrieval",
|
|
17
|
-
category="t2t",
|
|
18
|
-
modalities=["text"],
|
|
19
|
-
eval_splits=["test"],
|
|
20
|
-
eval_langs=["rus-Cyrl"],
|
|
21
|
-
main_score="ndcg_at_10",
|
|
22
|
-
date=("2010-01-01", "2014-12-31"),
|
|
23
|
-
domains=["News", "Written"],
|
|
24
|
-
task_subtypes=["Article retrieval"],
|
|
25
|
-
license="cc-by-nc-nd-4.0",
|
|
26
|
-
annotations_creators="derived",
|
|
27
|
-
dialect=[],
|
|
28
|
-
sample_creation="found",
|
|
29
|
-
bibtex_citation=r"""
|
|
30
|
-
@inproceedings{gavrilov2018self,
|
|
31
|
-
author = {Gavrilov, Daniil and Kalaidin, Pavel and Malykh, Valentin},
|
|
32
|
-
booktitle = {Proceedings of the 41st European Conference on Information Retrieval},
|
|
33
|
-
title = {Self-Attentive Model for Headline Generation},
|
|
34
|
-
year = {2019},
|
|
35
|
-
}
|
|
36
|
-
""",
|
|
37
40
|
prompt={"query": "Given a news title, retrieve relevant news article"},
|
|
41
|
+
**_ria_news_metadata,
|
|
38
42
|
)
|
|
39
43
|
|
|
40
44
|
|
|
@@ -48,27 +52,27 @@ class RiaNewsRetrievalHardNegatives(AbsTaskRetrieval):
|
|
|
48
52
|
"revision": "d42860a6c15f0a2c4485bda10c6e5b641fdfe479",
|
|
49
53
|
},
|
|
50
54
|
description="News article retrieval by headline. Based on Rossiya Segodnya dataset. The hard negative version has been created by pooling the 250 top documents per query from BM25, e5-multilingual-large and e5-mistral-instruct.",
|
|
51
|
-
reference="https://arxiv.org/abs/1901.07786",
|
|
52
|
-
type="Retrieval",
|
|
53
|
-
category="t2t",
|
|
54
|
-
modalities=["text"],
|
|
55
|
-
eval_splits=["test"],
|
|
56
|
-
eval_langs=["rus-Cyrl"],
|
|
57
|
-
main_score="ndcg_at_10",
|
|
58
|
-
date=("2010-01-01", "2014-12-31"),
|
|
59
|
-
domains=["News", "Written"],
|
|
60
|
-
task_subtypes=["Article retrieval"],
|
|
61
|
-
license="cc-by-nc-nd-4.0",
|
|
62
|
-
annotations_creators="derived",
|
|
63
|
-
dialect=[],
|
|
64
|
-
sample_creation="found",
|
|
65
|
-
bibtex_citation=r"""
|
|
66
|
-
@inproceedings{gavrilov2018self,
|
|
67
|
-
author = {Gavrilov, Daniil and Kalaidin, Pavel and Malykh, Valentin},
|
|
68
|
-
booktitle = {Proceedings of the 41st European Conference on Information Retrieval},
|
|
69
|
-
title = {Self-Attentive Model for Headline Generation},
|
|
70
|
-
year = {2019},
|
|
71
|
-
}
|
|
72
|
-
""",
|
|
73
55
|
adapted_from=["RiaNewsRetrieval"],
|
|
56
|
+
superseded_by="RiaNewsRetrievalHardNegatives.v2",
|
|
57
|
+
**_ria_news_metadata,
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class RiaNewsRetrievalHardNegativesV2(AbsTaskRetrieval):
|
|
62
|
+
ignore_identical_ids = True
|
|
63
|
+
|
|
64
|
+
metadata = TaskMetadata(
|
|
65
|
+
name="RiaNewsRetrievalHardNegatives.v2",
|
|
66
|
+
dataset={
|
|
67
|
+
"path": "mteb/RiaNewsRetrieval_test_top_250_only_w_correct-v2",
|
|
68
|
+
"revision": "d42860a6c15f0a2c4485bda10c6e5b641fdfe479",
|
|
69
|
+
},
|
|
70
|
+
description=(
|
|
71
|
+
"News article retrieval by headline. Based on Rossiya Segodnya dataset. "
|
|
72
|
+
"The hard negative version has been created by pooling the 250 top documents per query from BM25, e5-multilingual-large and e5-mistral-instruct."
|
|
73
|
+
"V2 uses a more appropriate prompt rather than the default prompt for retrieval. You can get more information on the effect of different prompt in the [PR](https://github.com/embeddings-benchmark/mteb/pull/3469#issuecomment-3436467106)"
|
|
74
|
+
),
|
|
75
|
+
adapted_from=["RiaNewsRetrieval"],
|
|
76
|
+
prompt={"query": "Given a news title, retrieve relevant news article"},
|
|
77
|
+
**_ria_news_metadata,
|
|
74
78
|
)
|
|
@@ -7,13 +7,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
|
|
|
7
7
|
class SlovakSumRetrieval(AbsTaskRetrieval):
|
|
8
8
|
metadata = TaskMetadata(
|
|
9
9
|
name="SlovakSumRetrieval",
|
|
10
|
-
description=""
|
|
11
|
-
SlovakSum, a Slovak news summarization dataset consisting of over 200 thousand
|
|
12
|
-
news articles with titles and short abstracts obtained from multiple Slovak newspapers.
|
|
13
|
-
|
|
14
|
-
Originally intended as a summarization task, but since no human annotations were provided
|
|
15
|
-
here reformulated to a retrieval task.
|
|
16
|
-
""",
|
|
10
|
+
description="SlovakSum, a Slovak news summarization dataset consisting of over 200 thousand news articles with titles and short abstracts obtained from multiple Slovak newspapers. Originally intended as a summarization task, but since no human annotations were provided here reformulated to a retrieval task.",
|
|
17
11
|
reference="https://huggingface.co/datasets/NaiveNeuron/slovaksum",
|
|
18
12
|
dataset={
|
|
19
13
|
"path": "NaiveNeuron/slovaksum",
|
|
@@ -42,9 +42,9 @@ class TurHistQuadRetrieval(AbsTaskRetrieval):
|
|
|
42
42
|
)
|
|
43
43
|
|
|
44
44
|
def load_data(self, **kwargs) -> None:
|
|
45
|
-
"""And transform to a retrieval
|
|
45
|
+
"""And transform to a retrieval dataset, which have the following attributes
|
|
46
46
|
|
|
47
|
-
self.corpus = dict[doc_id, dict[str, str]] #id => dict with document
|
|
47
|
+
self.corpus = dict[doc_id, dict[str, str]] #id => dict with document data like title and text
|
|
48
48
|
self.queries = dict[query_id, str] #id => query
|
|
49
49
|
self.relevant_docs = dict[query_id, dict[[doc_id, score]]
|
|
50
50
|
"""
|
|
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
|
|
|
5
5
|
class ArguAnaVN(AbsTaskRetrieval):
|
|
6
6
|
metadata = TaskMetadata(
|
|
7
7
|
name="ArguAna-VN",
|
|
8
|
-
description="
|
|
9
|
-
The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
|
|
10
|
-
- The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
|
|
11
|
-
- Applies advanced embedding models to filter the translations.
|
|
12
|
-
- Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
|
|
8
|
+
description="A translated dataset from NFCorpus: A Full-Text Learning to Rank Dataset for Medical Information Retrieval The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
|
|
13
9
|
reference="http://argumentation.bplaced.net/arguana/data",
|
|
14
10
|
dataset={
|
|
15
11
|
"path": "GreenNode/arguana-vn",
|
|
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
|
|
|
5
5
|
class ClimateFEVERVN(AbsTaskRetrieval):
|
|
6
6
|
metadata = TaskMetadata(
|
|
7
7
|
name="ClimateFEVER-VN",
|
|
8
|
-
description="
|
|
9
|
-
The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
|
|
10
|
-
- The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
|
|
11
|
-
- Applies advanced embedding models to filter the translations.
|
|
12
|
-
- Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
|
|
8
|
+
description="A translated dataset from CLIMATE-FEVER is a dataset adopting the FEVER methodology that consists of 1,535 real-world claims regarding climate-change. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
|
|
13
9
|
reference="https://www.sustainablefinance.uzh.ch/en/research/climate-fever.html",
|
|
14
10
|
dataset={
|
|
15
11
|
"path": "GreenNode/climate-fever-vn",
|
|
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
|
|
|
5
5
|
class CQADupstackAndroidVN(AbsTaskRetrieval):
|
|
6
6
|
metadata = TaskMetadata(
|
|
7
7
|
name="CQADupstackAndroid-VN",
|
|
8
|
-
description="
|
|
9
|
-
The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
|
|
10
|
-
- The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
|
|
11
|
-
- Applies advanced embedding models to filter the translations.
|
|
12
|
-
- Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
|
|
8
|
+
description="A translated dataset from CQADupStack: A Benchmark Data Set for Community Question-Answering Research The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
|
|
13
9
|
reference="http://nlp.cis.unimelb.edu.au/resources/cqadupstack/",
|
|
14
10
|
dataset={
|
|
15
11
|
"path": "GreenNode/cqadupstack-android-vn",
|