mteb 2.1.4__py3-none-any.whl → 2.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/__init__.py +4 -0
- mteb/_create_dataloaders.py +6 -3
- mteb/_evaluators/any_sts_evaluator.py +21 -12
- mteb/_evaluators/classification_metrics.py +54 -0
- mteb/_evaluators/clustering_evaluator.py +1 -1
- mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +9 -4
- mteb/_evaluators/pair_classification_evaluator.py +30 -38
- mteb/_evaluators/sklearn_evaluator.py +15 -28
- mteb/_evaluators/text/bitext_mining_evaluator.py +4 -1
- mteb/_evaluators/text/summarization_evaluator.py +4 -2
- mteb/_evaluators/zeroshot_classification_evaluator.py +2 -2
- mteb/abstasks/_data_filter/__init__.py +0 -0
- mteb/abstasks/_data_filter/filters.py +125 -0
- mteb/abstasks/_data_filter/task_pipelines.py +102 -0
- mteb/abstasks/_statistics_calculation.py +6 -2
- mteb/abstasks/classification.py +0 -2
- mteb/abstasks/clustering.py +1 -1
- mteb/abstasks/clustering_legacy.py +3 -0
- mteb/abstasks/multilabel_classification.py +10 -3
- mteb/abstasks/pair_classification.py +8 -1
- mteb/abstasks/sts.py +7 -0
- mteb/abstasks/task_metadata.py +1 -0
- mteb/benchmarks/_create_table.py +84 -37
- mteb/benchmarks/benchmark.py +74 -15
- mteb/benchmarks/benchmarks/__init__.py +8 -0
- mteb/benchmarks/benchmarks/benchmarks.py +259 -15
- mteb/benchmarks/get_benchmark.py +2 -0
- mteb/cache.py +47 -10
- mteb/deprecated_evaluator.py +8 -13
- mteb/descriptive_stats/BitextMining/RuSciBenchBitextMining.v2.json +61 -0
- mteb/descriptive_stats/Classification/HebrewSentimentAnalysis.v3.json +60 -0
- mteb/descriptive_stats/Classification/TurkishConstitutionalCourtViolation.json +54 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3ComputerScienceRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3EnergyRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceEnRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceFrRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3HrRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3IndustrialRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3NuclearRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PharmaceuticalsRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PhysicsRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3TelecomRetrieval.json +214 -0
- mteb/descriptive_stats/PairClassification/TERRa.V2.json +35 -0
- mteb/descriptive_stats/Reranking/JQaRARerankingLite.json +35 -0
- mteb/descriptive_stats/Reranking/JaCWIRRerankingLite.json +35 -0
- mteb/descriptive_stats/Reranking/MultiLongDocReranking.json +466 -0
- mteb/descriptive_stats/Retrieval/ArguAna-NL.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/JaCWIRRetrievalLite.json +30 -0
- mteb/descriptive_stats/Retrieval/JaqketRetrievalLite.json +30 -0
- mteb/descriptive_stats/Retrieval/MIRACLJaRetrievalLite.json +30 -0
- mteb/descriptive_stats/Retrieval/MrTyDiJaRetrievalLite.json +30 -0
- mteb/descriptive_stats/Retrieval/NFCorpus-NL.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/SCIDOCS-NL.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/SQuADKorV1Retrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/SciFact-NL.v2.json +30 -0
- mteb/evaluate.py +65 -45
- mteb/leaderboard/app.py +268 -133
- mteb/leaderboard/benchmark_selector.py +14 -5
- mteb/leaderboard/figures.py +13 -15
- mteb/leaderboard/table.py +82 -17
- mteb/models/__init__.py +4 -1
- mteb/models/abs_encoder.py +21 -17
- mteb/models/cache_wrappers/__init__.py +2 -1
- mteb/models/cache_wrappers/cache_backends/_hash_utils.py +2 -2
- mteb/models/cache_wrappers/cache_wrapper.py +1 -1
- mteb/models/get_model_meta.py +3 -114
- mteb/models/instruct_wrapper.py +5 -1
- mteb/models/model_implementations/align_models.py +7 -0
- mteb/models/model_implementations/amazon_models.py +1 -0
- mteb/models/model_implementations/andersborges.py +65 -0
- mteb/models/model_implementations/ara_models.py +8 -0
- mteb/models/model_implementations/arctic_models.py +8 -0
- mteb/models/model_implementations/b1ade_models.py +1 -0
- mteb/models/model_implementations/bedrock_models.py +4 -0
- mteb/models/model_implementations/bge_models.py +60 -0
- mteb/models/model_implementations/bica_model.py +35 -0
- mteb/models/model_implementations/blip2_models.py +11 -0
- mteb/models/model_implementations/blip_models.py +27 -0
- mteb/models/model_implementations/bm25.py +1 -0
- mteb/models/model_implementations/bmretriever_models.py +4 -0
- mteb/models/model_implementations/cadet_models.py +9 -0
- mteb/models/model_implementations/cde_models.py +14 -0
- mteb/models/model_implementations/clip_models.py +3 -0
- mteb/models/model_implementations/clips_models.py +100 -0
- mteb/models/model_implementations/codefuse_models.py +162 -0
- mteb/models/model_implementations/codesage_models.py +15 -0
- mteb/models/model_implementations/cohere_models.py +8 -1
- mteb/models/model_implementations/cohere_v.py +5 -0
- mteb/models/model_implementations/colpali_models.py +14 -6
- mteb/models/model_implementations/colqwen_models.py +271 -1
- mteb/models/model_implementations/colsmol_models.py +2 -0
- mteb/models/model_implementations/conan_models.py +1 -0
- mteb/models/model_implementations/dino_models.py +171 -0
- mteb/models/model_implementations/e5_instruct.py +4 -0
- mteb/models/model_implementations/e5_models.py +12 -101
- mteb/models/model_implementations/e5_v.py +1 -0
- mteb/models/model_implementations/eagerworks_models.py +164 -0
- mteb/models/model_implementations/emillykkejensen_models.py +91 -0
- mteb/models/model_implementations/en_code_retriever.py +1 -0
- mteb/models/model_implementations/euler_models.py +32 -0
- mteb/models/model_implementations/evaclip_models.py +4 -0
- mteb/models/model_implementations/fa_models.py +58 -0
- mteb/models/model_implementations/facebookai.py +193 -0
- mteb/models/model_implementations/geogpt_models.py +1 -0
- mteb/models/model_implementations/gme_v_models.py +11 -5
- mteb/models/model_implementations/google_models.py +16 -5
- mteb/models/model_implementations/granite_vision_embedding_models.py +7 -2
- mteb/models/model_implementations/gritlm_models.py +2 -0
- mteb/models/model_implementations/gte_models.py +78 -0
- mteb/models/model_implementations/hinvec_models.py +1 -0
- mteb/models/model_implementations/human.py +1 -0
- mteb/models/model_implementations/ibm_granite_models.py +6 -0
- mteb/models/model_implementations/inf_models.py +2 -0
- mteb/models/model_implementations/jasper_models.py +255 -2
- mteb/models/model_implementations/jina_clip.py +1 -0
- mteb/models/model_implementations/jina_models.py +209 -5
- mteb/models/model_implementations/kalm_models.py +203 -25
- mteb/models/model_implementations/kblab.py +31 -0
- mteb/models/model_implementations/kennethenevoldsen_models.py +74 -0
- mteb/models/model_implementations/kfst.py +25 -0
- mteb/models/model_implementations/kowshik24_models.py +32 -0
- mteb/models/model_implementations/lens_models.py +2 -0
- mteb/models/model_implementations/lgai_embedding_models.py +1 -0
- mteb/models/model_implementations/linq_models.py +3 -2
- mteb/models/model_implementations/listconranker.py +1 -1
- mteb/models/model_implementations/llm2clip_models.py +3 -0
- mteb/models/model_implementations/llm2vec_models.py +8 -0
- mteb/models/model_implementations/mcinext_models.py +3 -0
- mteb/models/model_implementations/mdbr_models.py +2 -0
- mteb/models/model_implementations/misc_models.py +362 -0
- mteb/models/model_implementations/mme5_models.py +1 -0
- mteb/models/model_implementations/moco_models.py +11 -0
- mteb/models/model_implementations/mod_models.py +191 -0
- mteb/models/model_implementations/model2vec_models.py +13 -0
- mteb/models/model_implementations/moka_models.py +3 -0
- mteb/models/model_implementations/mxbai_models.py +9 -0
- mteb/models/model_implementations/nbailab.py +70 -0
- mteb/models/model_implementations/no_instruct_sentence_models.py +1 -0
- mteb/models/model_implementations/nomic_models.py +156 -4
- mteb/models/model_implementations/nomic_models_vision.py +7 -2
- mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +23 -16
- mteb/models/model_implementations/nvidia_models.py +4 -1
- mteb/models/model_implementations/octen_models.py +195 -0
- mteb/models/model_implementations/openai_models.py +20 -16
- mteb/models/model_implementations/openclip_models.py +24 -0
- mteb/models/model_implementations/opensearch_neural_sparse_models.py +5 -0
- mteb/models/model_implementations/ops_moa_models.py +4 -2
- mteb/models/model_implementations/pawan_models.py +39 -0
- mteb/models/model_implementations/piccolo_models.py +8 -0
- mteb/models/model_implementations/promptriever_models.py +8 -4
- mteb/models/model_implementations/pylate_models.py +37 -4
- mteb/models/model_implementations/qodo_models.py +2 -0
- mteb/models/model_implementations/qtack_models.py +1 -0
- mteb/models/model_implementations/qwen3_models.py +6 -3
- mteb/models/model_implementations/qzhou_models.py +3 -1
- mteb/models/model_implementations/random_baseline.py +16 -21
- mteb/models/model_implementations/rasgaard_models.py +34 -0
- mteb/models/model_implementations/reasonir_model.py +1 -0
- mteb/models/model_implementations/repllama_models.py +2 -0
- mteb/models/model_implementations/rerankers_custom.py +3 -3
- mteb/models/model_implementations/rerankers_monot5_based.py +14 -14
- mteb/models/model_implementations/richinfoai_models.py +1 -0
- mteb/models/model_implementations/ru_sentence_models.py +51 -0
- mteb/models/model_implementations/ruri_models.py +322 -0
- mteb/models/model_implementations/salesforce_models.py +3 -0
- mteb/models/model_implementations/samilpwc_models.py +1 -0
- mteb/models/model_implementations/sarashina_embedding_models.py +168 -0
- mteb/models/model_implementations/searchmap_models.py +1 -0
- mteb/models/model_implementations/seed_1_6_embedding_models.py +8 -2
- mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +658 -0
- mteb/models/model_implementations/seed_models.py +1 -0
- mteb/models/model_implementations/sentence_transformers_models.py +57 -0
- mteb/models/model_implementations/shuu_model.py +32 -31
- mteb/models/model_implementations/siglip_models.py +10 -0
- mteb/models/model_implementations/sonar_models.py +1 -0
- mteb/models/model_implementations/spartan8806_atles_champion.py +34 -0
- mteb/models/model_implementations/stella_models.py +6 -0
- mteb/models/model_implementations/tarka_models.py +376 -0
- mteb/models/model_implementations/ua_sentence_models.py +10 -0
- mteb/models/model_implementations/uae_models.py +1 -0
- mteb/models/model_implementations/vdr_models.py +2 -0
- mteb/models/model_implementations/vi_vn_models.py +39 -0
- mteb/models/model_implementations/vista_models.py +2 -0
- mteb/models/model_implementations/vlm2vec_models.py +2 -0
- mteb/models/model_implementations/voyage_models.py +15 -0
- mteb/models/model_implementations/voyage_v.py +8 -2
- mteb/models/model_implementations/xyz_models.py +1 -0
- mteb/models/model_implementations/youtu_models.py +1 -0
- mteb/models/model_implementations/yuan_models.py +34 -0
- mteb/models/model_implementations/yuan_models_en.py +58 -0
- mteb/models/model_meta.py +442 -22
- mteb/models/search_encoder_index/__init__.py +7 -0
- mteb/models/search_encoder_index/search_backend_protocol.py +50 -0
- mteb/models/search_encoder_index/search_indexes/__init__.py +5 -0
- mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +157 -0
- mteb/models/search_wrappers.py +165 -48
- mteb/models/sentence_transformer_wrapper.py +2 -7
- mteb/results/benchmark_results.py +88 -47
- mteb/results/model_result.py +11 -4
- mteb/results/task_result.py +37 -19
- mteb/similarity_functions.py +49 -0
- mteb/tasks/bitext_mining/multilingual/__init__.py +2 -1
- mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +4 -2
- mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining_fast.py +1 -1
- mteb/tasks/bitext_mining/multilingual/ru_sci_bench_bitext_mining.py +47 -5
- mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -6
- mteb/tasks/classification/ara/ajgt.py +1 -2
- mteb/tasks/classification/ara/hotel_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/ara/tweet_emotion_classification.py +1 -2
- mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -2
- mteb/tasks/classification/ben/bengali_document_classification.py +1 -2
- mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -2
- mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -2
- mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/ces/czech_product_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/ces/czech_so_me_sentiment_classification.py +1 -2
- mteb/tasks/classification/dan/angry_tweets_classification.py +1 -2
- mteb/tasks/classification/dan/danish_political_comments_classification.py +1 -2
- mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -2
- mteb/tasks/classification/dan/dk_hate_classification.py +1 -2
- mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -2
- mteb/tasks/classification/deu/ten_k_gnad_classification.py +1 -2
- mteb/tasks/classification/eng/amazon_polarity_classification.py +1 -2
- mteb/tasks/classification/eng/arxiv_classification.py +1 -2
- mteb/tasks/classification/eng/banking77_classification.py +1 -2
- mteb/tasks/classification/eng/dbpedia_classification.py +1 -2
- mteb/tasks/classification/eng/emotion_classification.py +1 -2
- mteb/tasks/classification/eng/financial_phrasebank_classification.py +1 -2
- mteb/tasks/classification/eng/frenk_en_classification.py +1 -2
- mteb/tasks/classification/eng/gtsrb_classification.py +1 -1
- mteb/tasks/classification/eng/imdb_classification.py +1 -2
- mteb/tasks/classification/eng/legal_bench_classification.py +14 -120
- mteb/tasks/classification/eng/news_classification.py +1 -2
- mteb/tasks/classification/eng/patch_camelyon_classification.py +1 -1
- mteb/tasks/classification/eng/patent_classification.py +1 -2
- mteb/tasks/classification/eng/poem_sentiment_classification.py +1 -2
- mteb/tasks/classification/eng/sds_eye_protection_classification.py +1 -2
- mteb/tasks/classification/eng/sds_gloves_classification.py +1 -2
- mteb/tasks/classification/eng/toxic_chat_classification.py +2 -19
- mteb/tasks/classification/eng/toxic_conversations_classification.py +1 -2
- mteb/tasks/classification/eng/tweet_sentiment_extraction_classification.py +1 -2
- mteb/tasks/classification/eng/tweet_topic_single_classification.py +2 -13
- mteb/tasks/classification/eng/ucf101_classification.py +1 -5
- mteb/tasks/classification/eng/wikipedia_bio_met_chem_classification.py +1 -2
- mteb/tasks/classification/eng/wikipedia_chem_fields_classification.py +1 -2
- mteb/tasks/classification/eng/wikipedia_comp_chem_spectroscopy_classification.py +1 -2
- mteb/tasks/classification/eng/wikipedia_crystallography_analytical_classification.py +1 -2
- mteb/tasks/classification/eng/wikipedia_theoretical_applied_classification.py +1 -2
- mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -2
- mteb/tasks/classification/eng/yelp_review_full_classification.py +1 -2
- mteb/tasks/classification/est/estonian_valence.py +1 -2
- mteb/tasks/classification/fas/fa_mteb_classification.py +7 -14
- mteb/tasks/classification/fil/filipino_hate_speech_classification.py +1 -2
- mteb/tasks/classification/fin/fin_toxicity_classification.py +2 -11
- mteb/tasks/classification/fra/french_book_reviews.py +1 -2
- mteb/tasks/classification/fra/movie_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/guj/gujarati_news_classification.py +1 -2
- mteb/tasks/classification/heb/__init__.py +6 -1
- mteb/tasks/classification/heb/hebrew_sentiment_analysis.py +62 -4
- mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -2
- mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -2
- mteb/tasks/classification/hrv/frenk_hr_classification.py +1 -2
- mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +1 -2
- mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -2
- mteb/tasks/classification/ita/italian_linguist_acceptability_classification.py +1 -2
- mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -2
- mteb/tasks/classification/jpn/wrime_classification.py +1 -2
- mteb/tasks/classification/kan/kannada_news_classification.py +1 -2
- mteb/tasks/classification/kor/klue_tc.py +1 -2
- mteb/tasks/classification/kor/kor_hate_classification.py +2 -17
- mteb/tasks/classification/kor/kor_sarcasm_classification.py +2 -19
- mteb/tasks/classification/kur/kurdish_sentiment_classification.py +1 -2
- mteb/tasks/classification/mal/malayalam_news_classification.py +1 -2
- mteb/tasks/classification/mar/marathi_news_classification.py +1 -2
- mteb/tasks/classification/mkd/macedonian_tweet_sentiment_classification.py +1 -2
- mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -6
- mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -4
- mteb/tasks/classification/multilingual/ru_sci_bench_classification.py +4 -23
- mteb/tasks/classification/multilingual/scala_classification.py +1 -2
- mteb/tasks/classification/multilingual/sib200_classification.py +1 -6
- mteb/tasks/classification/mya/myanmar_news.py +1 -2
- mteb/tasks/classification/nep/nepali_news_classification.py +1 -2
- mteb/tasks/classification/nld/dutch_book_review_sentiment_classification.py +4 -2
- mteb/tasks/classification/nld/dutch_cola_classification.py +3 -0
- mteb/tasks/classification/nld/dutch_government_bias_classification.py +3 -0
- mteb/tasks/classification/nld/dutch_news_articles_classification.py +3 -0
- mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +3 -0
- mteb/tasks/classification/nld/iconclass_classification.py +3 -0
- mteb/tasks/classification/nld/open_tender_classification.py +3 -0
- mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +3 -0
- mteb/tasks/classification/nob/no_rec_classification.py +1 -2
- mteb/tasks/classification/nob/norwegian_parliament_classification.py +1 -2
- mteb/tasks/classification/ory/odia_news_classification.py +1 -2
- mteb/tasks/classification/pol/polish_classification.py +3 -6
- mteb/tasks/classification/ron/moroco.py +1 -2
- mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -2
- mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -2
- mteb/tasks/classification/rus/georeview_classification.py +1 -2
- mteb/tasks/classification/rus/headline_classification.py +1 -2
- mteb/tasks/classification/rus/inappropriateness_classification.py +1 -2
- mteb/tasks/classification/rus/ru_reviews_classification.py +1 -2
- mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -2
- mteb/tasks/classification/rus/senti_ru_eval.py +1 -2
- mteb/tasks/classification/sin/sinhala_news_classification.py +1 -2
- mteb/tasks/classification/sin/sinhala_news_source_classification.py +1 -2
- mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/slk/slovak_hate_speech_classification.py +1 -2
- mteb/tasks/classification/slk/slovak_movie_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/slv/frenk_sl_classification.py +1 -2
- mteb/tasks/classification/spa/spanish_news_classification.py +1 -2
- mteb/tasks/classification/spa/spanish_sentiment_classification.py +1 -2
- mteb/tasks/classification/ssw/siswati_news_classification.py +1 -2
- mteb/tasks/classification/swa/swahili_news_classification.py +1 -2
- mteb/tasks/classification/swe/dalaj_classification.py +1 -2
- mteb/tasks/classification/swe/swe_rec_classification.py +1 -2
- mteb/tasks/classification/swe/swedish_sentiment_classification.py +1 -2
- mteb/tasks/classification/tam/tamil_news_classification.py +1 -2
- mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +1 -2
- mteb/tasks/classification/tha/wisesight_sentiment_classification.py +1 -2
- mteb/tasks/classification/tsn/tswana_news_classification.py +1 -2
- mteb/tasks/classification/tur/__init__.py +4 -0
- mteb/tasks/classification/tur/turkish_constitutional_court.py +41 -0
- mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +1 -2
- mteb/tasks/classification/tur/turkish_product_sentiment_classification.py +1 -2
- mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -15
- mteb/tasks/classification/urd/urdu_roman_sentiment_classification.py +1 -2
- mteb/tasks/classification/vie/amazon_counterfactual_vn_classification.py +1 -6
- mteb/tasks/classification/vie/amazon_polarity_vn_classification.py +1 -6
- mteb/tasks/classification/vie/amazon_reviews_vn_classification.py +1 -5
- mteb/tasks/classification/vie/banking77_vn_classification.py +1 -5
- mteb/tasks/classification/vie/emotion_vn_classification.py +1 -5
- mteb/tasks/classification/vie/imdb_vn_classification.py +1 -5
- mteb/tasks/classification/vie/massive_intent_vn_classification.py +1 -5
- mteb/tasks/classification/vie/massive_scenario_vn_classification.py +1 -5
- mteb/tasks/classification/vie/mtop_domain_vn_classification.py +1 -5
- mteb/tasks/classification/vie/mtop_intent_vn_classification.py +1 -5
- mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -5
- mteb/tasks/classification/vie/tweet_sentiment_extraction_vn_classification.py +1 -5
- mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -2
- mteb/tasks/classification/zho/cmteb_classification.py +5 -10
- mteb/tasks/classification/zho/yue_openrice_review_classification.py +1 -2
- mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -2
- mteb/tasks/clustering/jpn/mews_c16_ja_clustering.py +1 -3
- mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -6
- mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +3 -0
- mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +3 -0
- mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +3 -0
- mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +3 -0
- mteb/tasks/clustering/nld/open_tender_clustering_s2s.py +3 -0
- mteb/tasks/clustering/nld/vabb_clustering_p2p.py +3 -0
- mteb/tasks/clustering/nld/vabb_clustering_s2s.py +3 -0
- mteb/tasks/clustering/vie/reddit_clustering_p2p_vn.py +1 -5
- mteb/tasks/clustering/vie/reddit_clustering_vn.py +1 -5
- mteb/tasks/clustering/vie/stack_exchange_clustering_p2p_vn.py +1 -5
- mteb/tasks/clustering/vie/stack_exchange_clustering_vn.py +1 -5
- mteb/tasks/clustering/vie/twenty_newsgroups_clustering_vn.py +1 -5
- mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -5
- mteb/tasks/multilabel_classification/kor/kor_hate_speech_ml_classification.py +1 -9
- mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -6
- mteb/tasks/multilabel_classification/nld/covid_disinformation_nl_multi_label_classification.py +3 -0
- mteb/tasks/multilabel_classification/nld/vabb_multi_label_classification.py +3 -0
- mteb/tasks/multilabel_classification/por/brazilian_toxic_tweets_classification.py +1 -6
- mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
- mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -2
- mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -6
- mteb/tasks/pair_classification/eng/legal_bench_pc.py +1 -9
- mteb/tasks/pair_classification/nld/sick_nl_pair_classification.py +3 -0
- mteb/tasks/pair_classification/nld/xlwic_nl_pair_classification.py +3 -0
- mteb/tasks/pair_classification/rus/__init__.py +2 -2
- mteb/tasks/pair_classification/rus/terra.py +51 -25
- mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -5
- mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -5
- mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -5
- mteb/tasks/regression/multilingual/ru_sci_bench_regression.py +2 -6
- mteb/tasks/reranking/jpn/__init__.py +9 -1
- mteb/tasks/reranking/jpn/j_qa_ra_reranking_lite.py +49 -0
- mteb/tasks/reranking/jpn/ja_cwir_reranking_lite.py +47 -0
- mteb/tasks/reranking/multilingual/__init__.py +2 -0
- mteb/tasks/reranking/multilingual/multi_long_doc_reranking.py +70 -0
- mteb/tasks/reranking/multilingual/x_glue_wpr_reranking.py +1 -2
- mteb/tasks/reranking/vie/ask_ubuntu_dup_questions_vn.py +1 -5
- mteb/tasks/reranking/vie/sci_docs_reranking_vn.py +1 -5
- mteb/tasks/reranking/vie/stack_overflow_dup_questions_vn.py +1 -5
- mteb/tasks/retrieval/code/fresh_stack_retrieval.py +8 -5
- mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -8
- mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +4 -0
- mteb/tasks/retrieval/jpn/__init__.py +8 -0
- mteb/tasks/retrieval/jpn/ja_cwir_retrieval.py +1 -4
- mteb/tasks/retrieval/jpn/ja_cwir_retrieval_lite.py +47 -0
- mteb/tasks/retrieval/jpn/jaqket_retrieval_lite.py +50 -0
- mteb/tasks/retrieval/jpn/miracl_ja_retrieval_lite.py +52 -0
- mteb/tasks/retrieval/jpn/mr_tydi_ja_retrieval_lite.py +48 -0
- mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +11 -4
- mteb/tasks/retrieval/kor/__init__.py +2 -1
- mteb/tasks/retrieval/kor/squad_kor_v1_retrieval.py +47 -0
- mteb/tasks/retrieval/multilingual/__init__.py +22 -0
- mteb/tasks/retrieval/multilingual/belebele_retrieval.py +5 -4
- mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +56 -42
- mteb/tasks/retrieval/multilingual/mkqa_retrieval.py +1 -2
- mteb/tasks/retrieval/multilingual/mlqa_retrieval.py +1 -4
- mteb/tasks/retrieval/multilingual/multi_long_doc_retrieval.py +1 -2
- mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +9 -4
- mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +2 -12
- mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +4 -2
- mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +399 -0
- mteb/tasks/retrieval/nld/__init__.py +8 -4
- mteb/tasks/retrieval/nld/argu_ana_nl_retrieval.py +46 -27
- mteb/tasks/retrieval/nld/bbsard_nl_retrieval.py +3 -0
- mteb/tasks/retrieval/nld/dutch_news_articles_retrieval.py +3 -0
- mteb/tasks/retrieval/nld/legal_qa_nl_retrieval.py +3 -0
- mteb/tasks/retrieval/nld/nf_corpus_nl_retrieval.py +42 -25
- mteb/tasks/retrieval/nld/open_tender_retrieval.py +3 -0
- mteb/tasks/retrieval/nld/sci_fact_nl_retrieval.py +42 -24
- mteb/tasks/retrieval/nld/scidocsnl_retrieval.py +44 -27
- mteb/tasks/retrieval/nld/vabb_retrieval.py +3 -0
- mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -7
- mteb/tasks/retrieval/vie/argu_ana_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_android_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_gis_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_mathematica_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_physics_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_programmers_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_stats_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_tex_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_unix_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_webmasters_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_wordpress_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/fevervn_retrieval.py +1 -7
- mteb/tasks/retrieval/vie/fi_qa2018_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/green_node_table_markdown_retrieval.py +16 -1
- mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +1 -6
- mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/nf_corpus_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/nqvn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/quora_vn_retrieval.py +1 -6
- mteb/tasks/retrieval/vie/sci_fact_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/scidocsvn_retrieval.py +1 -6
- mteb/tasks/retrieval/vie/touche2020_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/treccovidvn_retrieval.py +1 -5
- mteb/tasks/sts/nld/sick_nl_sts.py +1 -0
- mteb/tasks/sts/vie/biosses_stsvn.py +1 -5
- mteb/tasks/sts/vie/sickr_stsvn.py +1 -5
- mteb/tasks/sts/vie/sts_benchmark_stsvn.py +1 -5
- mteb/tasks/zeroshot_classification/eng/gtsrb.py +1 -1
- mteb/tasks/zeroshot_classification/eng/patch_camelyon.py +1 -1
- mteb/tasks/zeroshot_classification/eng/ucf101.py +1 -5
- mteb/types/_encoder_io.py +7 -2
- {mteb-2.1.4.dist-info → mteb-2.5.2.dist-info}/METADATA +11 -5
- {mteb-2.1.4.dist-info → mteb-2.5.2.dist-info}/RECORD +457 -391
- mteb/models/model_implementations/nb_sbert.py +0 -25
- {mteb-2.1.4.dist-info → mteb-2.5.2.dist-info}/WHEEL +0 -0
- {mteb-2.1.4.dist-info → mteb-2.5.2.dist-info}/entry_points.txt +0 -0
- {mteb-2.1.4.dist-info → mteb-2.5.2.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.1.4.dist-info → mteb-2.5.2.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,399 @@
|
|
|
1
|
+
from mteb.abstasks.retrieval import AbsTaskRetrieval
|
|
2
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
3
|
+
|
|
4
|
+
_LANGS = {
|
|
5
|
+
"french": ["fra-Latn"],
|
|
6
|
+
"spanish": ["spa-Latn"],
|
|
7
|
+
"english": ["eng-Latn"],
|
|
8
|
+
"german": ["deu-Latn"],
|
|
9
|
+
"italian": ["ita-Latn"],
|
|
10
|
+
"portuguese": ["por-Latn"],
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class Vidore3FinanceEnRetrieval(AbsTaskRetrieval):
|
|
15
|
+
metadata = TaskMetadata(
|
|
16
|
+
name="Vidore3FinanceEnRetrieval",
|
|
17
|
+
description="Retrieve associated pages according to questions. This task, Finance - EN, is a corpus of reports from american banking companies, intended for long-document understanding tasks. Original queries were created in english, then translated to french, german, italian, portuguese and spanish.",
|
|
18
|
+
reference="https://huggingface.co/blog/QuentinJG/introducing-vidore-v3",
|
|
19
|
+
dataset={
|
|
20
|
+
"path": "vidore/vidore_v3_finance_en_mteb_format",
|
|
21
|
+
"revision": "fa78cb14152b3dde8c5defdc4e3ddf50de69dfeb",
|
|
22
|
+
},
|
|
23
|
+
type="DocumentUnderstanding",
|
|
24
|
+
category="t2i",
|
|
25
|
+
eval_splits=["test"],
|
|
26
|
+
eval_langs=_LANGS,
|
|
27
|
+
main_score="ndcg_at_10",
|
|
28
|
+
date=("2025-10-01", "2025-11-01"),
|
|
29
|
+
domains=["Financial"],
|
|
30
|
+
task_subtypes=["Image Text Retrieval"],
|
|
31
|
+
license="cc-by-4.0",
|
|
32
|
+
annotations_creators="derived",
|
|
33
|
+
dialect=[],
|
|
34
|
+
modalities=["text", "image"],
|
|
35
|
+
sample_creation="created and machine-translated",
|
|
36
|
+
bibtex_citation=r"""
|
|
37
|
+
@misc{mace2025vidorev3,
|
|
38
|
+
author = {Macé, Quentin and Loison, Antonio and EDY, Antoine and Xing, Victor and Viaud, Gautier},
|
|
39
|
+
day = {5},
|
|
40
|
+
howpublished = {\url{https://huggingface.co/blog/QuentinJG/introducing-vidore-v3}},
|
|
41
|
+
journal = {Hugging Face Blog},
|
|
42
|
+
month = {November},
|
|
43
|
+
publisher = {Hugging Face},
|
|
44
|
+
title = {ViDoRe V3: a comprehensive evaluation of retrieval for enterprise use-cases},
|
|
45
|
+
year = {2025},
|
|
46
|
+
}
|
|
47
|
+
""",
|
|
48
|
+
prompt={"query": "Find a screenshot that is relevant to the user's question."},
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class Vidore3FinanceFrRetrieval(AbsTaskRetrieval):
|
|
53
|
+
metadata = TaskMetadata(
|
|
54
|
+
name="Vidore3FinanceFrRetrieval",
|
|
55
|
+
description="Retrieve associated pages according to questions. This task, Finance - FR, is a corpus of reports from french companies in the luxury domain, intended for long-document understanding tasks. Original queries were created in french, then translated to english, german, italian, portuguese and spanish.",
|
|
56
|
+
reference="https://huggingface.co/blog/QuentinJG/introducing-vidore-v3",
|
|
57
|
+
dataset={
|
|
58
|
+
"path": "vidore/vidore_v3_finance_fr_mteb_format",
|
|
59
|
+
"revision": "8a2adfda85a7967c7252129703d9b3c7c9f038a9",
|
|
60
|
+
},
|
|
61
|
+
type="DocumentUnderstanding",
|
|
62
|
+
category="t2i",
|
|
63
|
+
eval_splits=["test"],
|
|
64
|
+
eval_langs=_LANGS,
|
|
65
|
+
main_score="ndcg_at_10",
|
|
66
|
+
date=("2025-10-01", "2025-11-01"),
|
|
67
|
+
domains=["Financial"],
|
|
68
|
+
task_subtypes=["Image Text Retrieval"],
|
|
69
|
+
license="cc-by-4.0",
|
|
70
|
+
annotations_creators="derived",
|
|
71
|
+
dialect=[],
|
|
72
|
+
sample_creation="created and machine-translated",
|
|
73
|
+
bibtex_citation=r"""
|
|
74
|
+
@misc{mace2025vidorev3,
|
|
75
|
+
author = {Macé, Quentin and Loison, Antonio and EDY, Antoine and Xing, Victor and Viaud, Gautier},
|
|
76
|
+
day = {5},
|
|
77
|
+
howpublished = {\url{https://huggingface.co/blog/QuentinJG/introducing-vidore-v3}},
|
|
78
|
+
journal = {Hugging Face Blog},
|
|
79
|
+
month = {November},
|
|
80
|
+
publisher = {Hugging Face},
|
|
81
|
+
title = {ViDoRe V3: a comprehensive evaluation of retrieval for enterprise use-cases},
|
|
82
|
+
year = {2025},
|
|
83
|
+
}
|
|
84
|
+
""",
|
|
85
|
+
prompt={"query": "Find a screenshot that is relevant to the user's question."},
|
|
86
|
+
is_public=True,
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class Vidore3IndustrialRetrieval(AbsTaskRetrieval):
|
|
91
|
+
metadata = TaskMetadata(
|
|
92
|
+
name="Vidore3IndustrialRetrieval",
|
|
93
|
+
description="Retrieve associated pages according to questions. This dataset, Industrial reports, is a corpus of technical documents on military aircraft (fueling, mechanics...), intended for complex-document understanding tasks. Original queries were created in english, then translated to french, german, italian, portuguese and spanish.",
|
|
94
|
+
reference="https://huggingface.co/blog/QuentinJG/introducing-vidore-v3",
|
|
95
|
+
dataset={
|
|
96
|
+
"path": "vidore/vidore_v3_industrial_mteb_format",
|
|
97
|
+
"revision": "f732b725cf4a70803210edfe265a04f8bd5328f6",
|
|
98
|
+
},
|
|
99
|
+
type="DocumentUnderstanding",
|
|
100
|
+
category="t2i",
|
|
101
|
+
eval_splits=["test"],
|
|
102
|
+
eval_langs=_LANGS,
|
|
103
|
+
main_score="ndcg_at_10",
|
|
104
|
+
date=("2025-10-01", "2025-11-01"),
|
|
105
|
+
domains=["Engineering"],
|
|
106
|
+
task_subtypes=["Image Text Retrieval"],
|
|
107
|
+
license="cc-by-4.0",
|
|
108
|
+
annotations_creators="derived",
|
|
109
|
+
dialect=[],
|
|
110
|
+
modalities=["text", "image"],
|
|
111
|
+
sample_creation="created and machine-translated",
|
|
112
|
+
bibtex_citation=r"""
|
|
113
|
+
@misc{mace2025vidorev3,
|
|
114
|
+
author = {Macé, Quentin and Loison, Antonio and EDY, Antoine and Xing, Victor and Viaud, Gautier},
|
|
115
|
+
day = {5},
|
|
116
|
+
howpublished = {\url{https://huggingface.co/blog/QuentinJG/introducing-vidore-v3}},
|
|
117
|
+
journal = {Hugging Face Blog},
|
|
118
|
+
month = {November},
|
|
119
|
+
publisher = {Hugging Face},
|
|
120
|
+
title = {ViDoRe V3: a comprehensive evaluation of retrieval for enterprise use-cases},
|
|
121
|
+
year = {2025},
|
|
122
|
+
}
|
|
123
|
+
""",
|
|
124
|
+
prompt={"query": "Find a screenshot that is relevant to the user's question."},
|
|
125
|
+
is_public=True,
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
class Vidore3PharmaceuticalsRetrieval(AbsTaskRetrieval):
|
|
130
|
+
metadata = TaskMetadata(
|
|
131
|
+
name="Vidore3PharmaceuticalsRetrieval",
|
|
132
|
+
description="Retrieve associated pages according to questions. This dataset, Pharmaceutical, is a corpus of slides from the FDA, intended for long-document understanding tasks. Original queries were created in english, then translated to french, german, italian, portuguese and spanish.",
|
|
133
|
+
reference="https://huggingface.co/blog/QuentinJG/introducing-vidore-v3",
|
|
134
|
+
dataset={
|
|
135
|
+
"path": "vidore/vidore_v3_pharmaceuticals_mteb_format",
|
|
136
|
+
"revision": "237ed4f43c7fb3c4df07ec4e9dd0a4366be555b0",
|
|
137
|
+
},
|
|
138
|
+
type="DocumentUnderstanding",
|
|
139
|
+
category="t2i",
|
|
140
|
+
eval_splits=["test"],
|
|
141
|
+
eval_langs=_LANGS,
|
|
142
|
+
main_score="ndcg_at_10",
|
|
143
|
+
date=("2025-10-01", "2025-11-01"),
|
|
144
|
+
domains=["Medical"],
|
|
145
|
+
task_subtypes=["Image Text Retrieval"],
|
|
146
|
+
license="cc-by-4.0",
|
|
147
|
+
annotations_creators="derived",
|
|
148
|
+
dialect=[],
|
|
149
|
+
modalities=["text", "image"],
|
|
150
|
+
sample_creation="created and machine-translated",
|
|
151
|
+
bibtex_citation=r"""
|
|
152
|
+
@misc{mace2025vidorev3,
|
|
153
|
+
author = {Macé, Quentin and Loison, Antonio and EDY, Antoine and Xing, Victor and Viaud, Gautier},
|
|
154
|
+
day = {5},
|
|
155
|
+
howpublished = {\url{https://huggingface.co/blog/QuentinJG/introducing-vidore-v3}},
|
|
156
|
+
journal = {Hugging Face Blog},
|
|
157
|
+
month = {November},
|
|
158
|
+
publisher = {Hugging Face},
|
|
159
|
+
title = {ViDoRe V3: a comprehensive evaluation of retrieval for enterprise use-cases},
|
|
160
|
+
year = {2025},
|
|
161
|
+
}
|
|
162
|
+
""",
|
|
163
|
+
prompt={"query": "Find a screenshot that is relevant to the user's question."},
|
|
164
|
+
is_public=True,
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
class Vidore3ComputerScienceRetrieval(AbsTaskRetrieval):
|
|
169
|
+
metadata = TaskMetadata(
|
|
170
|
+
name="Vidore3ComputerScienceRetrieval",
|
|
171
|
+
description="Retrieve associated pages according to questions. This dataset, Computer Science, is a corpus of textbooks from the openstacks website, intended for long-document understanding tasks. Original queries were created in english, then translated to french, german, italian, portuguese and spanish.",
|
|
172
|
+
reference="https://huggingface.co/blog/QuentinJG/introducing-vidore-v3",
|
|
173
|
+
dataset={
|
|
174
|
+
"path": "vidore/vidore_v3_computer_science_mteb_format",
|
|
175
|
+
"revision": "fb7fb69f81f7db62790f40494124b8ad22b424ab",
|
|
176
|
+
},
|
|
177
|
+
type="DocumentUnderstanding",
|
|
178
|
+
category="t2i",
|
|
179
|
+
eval_splits=["test"],
|
|
180
|
+
eval_langs=_LANGS,
|
|
181
|
+
main_score="ndcg_at_10",
|
|
182
|
+
date=("2025-10-01", "2025-11-01"),
|
|
183
|
+
domains=["Engineering", "Programming"],
|
|
184
|
+
task_subtypes=["Image Text Retrieval"],
|
|
185
|
+
license="cc-by-4.0",
|
|
186
|
+
annotations_creators="derived",
|
|
187
|
+
dialect=[],
|
|
188
|
+
modalities=["text", "image"],
|
|
189
|
+
sample_creation="created and machine-translated",
|
|
190
|
+
bibtex_citation=r"""
|
|
191
|
+
@misc{mace2025vidorev3,
|
|
192
|
+
author = {Macé, Quentin and Loison, Antonio and EDY, Antoine and Xing, Victor and Viaud, Gautier},
|
|
193
|
+
day = {5},
|
|
194
|
+
howpublished = {\url{https://huggingface.co/blog/QuentinJG/introducing-vidore-v3}},
|
|
195
|
+
journal = {Hugging Face Blog},
|
|
196
|
+
month = {November},
|
|
197
|
+
publisher = {Hugging Face},
|
|
198
|
+
title = {ViDoRe V3: a comprehensive evaluation of retrieval for enterprise use-cases},
|
|
199
|
+
year = {2025},
|
|
200
|
+
}
|
|
201
|
+
""",
|
|
202
|
+
prompt={"query": "Find a screenshot that is relevant to the user's question."},
|
|
203
|
+
is_public=True,
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
class Vidore3HrRetrieval(AbsTaskRetrieval):
|
|
208
|
+
metadata = TaskMetadata(
|
|
209
|
+
name="Vidore3HrRetrieval",
|
|
210
|
+
description="Retrieve associated pages according to questions. This dataset, HR, is a corpus of reports released by the european union, intended for complex-document understanding tasks. Original queries were created in english, then translated to french, german, italian, portuguese and spanish.",
|
|
211
|
+
reference="https://huggingface.co/blog/QuentinJG/introducing-vidore-v3",
|
|
212
|
+
dataset={
|
|
213
|
+
"path": "vidore/vidore_v3_hr_mteb_format",
|
|
214
|
+
"revision": "bc7d43d64815ed30f664168c8052106484aba7fd",
|
|
215
|
+
},
|
|
216
|
+
type="DocumentUnderstanding",
|
|
217
|
+
category="t2i",
|
|
218
|
+
eval_splits=["test"],
|
|
219
|
+
eval_langs=_LANGS,
|
|
220
|
+
main_score="ndcg_at_10",
|
|
221
|
+
date=("2025-10-01", "2025-11-01"),
|
|
222
|
+
domains=["Social"],
|
|
223
|
+
task_subtypes=["Image Text Retrieval"],
|
|
224
|
+
license="cc-by-4.0",
|
|
225
|
+
annotations_creators="derived",
|
|
226
|
+
dialect=[],
|
|
227
|
+
modalities=["text", "image"],
|
|
228
|
+
sample_creation="created and machine-translated",
|
|
229
|
+
bibtex_citation=r"""
|
|
230
|
+
@misc{mace2025vidorev3,
|
|
231
|
+
author = {Macé, Quentin and Loison, Antonio and EDY, Antoine and Xing, Victor and Viaud, Gautier},
|
|
232
|
+
day = {5},
|
|
233
|
+
howpublished = {\url{https://huggingface.co/blog/QuentinJG/introducing-vidore-v3}},
|
|
234
|
+
journal = {Hugging Face Blog},
|
|
235
|
+
month = {November},
|
|
236
|
+
publisher = {Hugging Face},
|
|
237
|
+
title = {ViDoRe V3: a comprehensive evaluation of retrieval for enterprise use-cases},
|
|
238
|
+
year = {2025},
|
|
239
|
+
}
|
|
240
|
+
""",
|
|
241
|
+
prompt={"query": "Find a screenshot that is relevant to the user's question."},
|
|
242
|
+
is_public=True,
|
|
243
|
+
)
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
class Vidore3EnergyRetrieval(AbsTaskRetrieval):
|
|
247
|
+
metadata = TaskMetadata(
|
|
248
|
+
name="Vidore3EnergyRetrieval",
|
|
249
|
+
description="Retrieve associated pages according to questions. This dataset, Energy Fr, is a corpus of reports on energy supply in europe, intended for complex-document understanding tasks. Original queries were created in french, then translated to english, german, italian, portuguese and spanish.",
|
|
250
|
+
reference="https://huggingface.co/blog/QuentinJG/introducing-vidore-v3",
|
|
251
|
+
dataset={
|
|
252
|
+
"path": "vidore/vidore_v3_energy_mteb_format",
|
|
253
|
+
"revision": "84fca99e5978604bae30f2436eacb6dbaa0532e9",
|
|
254
|
+
},
|
|
255
|
+
type="DocumentUnderstanding",
|
|
256
|
+
category="t2i",
|
|
257
|
+
eval_splits=["test"],
|
|
258
|
+
eval_langs=_LANGS,
|
|
259
|
+
main_score="ndcg_at_10",
|
|
260
|
+
date=("2025-10-01", "2025-11-01"),
|
|
261
|
+
domains=["Engineering", "Chemistry", "Academic"],
|
|
262
|
+
task_subtypes=["Image Text Retrieval"],
|
|
263
|
+
license="cc-by-4.0",
|
|
264
|
+
annotations_creators="derived",
|
|
265
|
+
dialect=[],
|
|
266
|
+
modalities=["text", "image"],
|
|
267
|
+
sample_creation="created and machine-translated",
|
|
268
|
+
bibtex_citation=r"""
|
|
269
|
+
@misc{mace2025vidorev3,
|
|
270
|
+
author = {Macé, Quentin and Loison, Antonio and EDY, Antoine and Xing, Victor and Viaud, Gautier},
|
|
271
|
+
day = {5},
|
|
272
|
+
howpublished = {\url{https://huggingface.co/blog/QuentinJG/introducing-vidore-v3}},
|
|
273
|
+
journal = {Hugging Face Blog},
|
|
274
|
+
month = {November},
|
|
275
|
+
publisher = {Hugging Face},
|
|
276
|
+
title = {ViDoRe V3: a comprehensive evaluation of retrieval for enterprise use-cases},
|
|
277
|
+
year = {2025},
|
|
278
|
+
}
|
|
279
|
+
""",
|
|
280
|
+
prompt={"query": "Find a screenshot that is relevant to the user's question."},
|
|
281
|
+
is_public=True,
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
class Vidore3PhysicsRetrieval(AbsTaskRetrieval):
|
|
286
|
+
metadata = TaskMetadata(
|
|
287
|
+
name="Vidore3PhysicsRetrieval",
|
|
288
|
+
description="Retrieve associated pages according to questions. This dataset, Physics, is a corpus of course slides on french bachelor level physics lectures, intended for complex visual understanding tasks. Original queries were created in french, then translated to english, german, italian, portuguese and spanish.",
|
|
289
|
+
reference="https://huggingface.co/blog/QuentinJG/introducing-vidore-v3",
|
|
290
|
+
dataset={
|
|
291
|
+
"path": "vidore/vidore_v3_physics_mteb_format",
|
|
292
|
+
"revision": "2c18ef90ab3ef93a9d86ecc6521cdae2a29f8300",
|
|
293
|
+
},
|
|
294
|
+
type="DocumentUnderstanding",
|
|
295
|
+
category="t2i",
|
|
296
|
+
eval_splits=["test"],
|
|
297
|
+
eval_langs=_LANGS,
|
|
298
|
+
main_score="ndcg_at_10",
|
|
299
|
+
date=("2025-10-01", "2025-11-01"),
|
|
300
|
+
domains=["Engineering", "Academic"],
|
|
301
|
+
task_subtypes=["Image Text Retrieval"],
|
|
302
|
+
license="cc-by-4.0",
|
|
303
|
+
annotations_creators="derived",
|
|
304
|
+
dialect=[],
|
|
305
|
+
modalities=["text", "image"],
|
|
306
|
+
sample_creation="created and machine-translated",
|
|
307
|
+
bibtex_citation=r"""
|
|
308
|
+
@misc{mace2025vidorev3,
|
|
309
|
+
author = {Macé, Quentin and Loison, Antonio and EDY, Antoine and Xing, Victor and Viaud, Gautier},
|
|
310
|
+
day = {5},
|
|
311
|
+
howpublished = {\url{https://huggingface.co/blog/QuentinJG/introducing-vidore-v3}},
|
|
312
|
+
journal = {Hugging Face Blog},
|
|
313
|
+
month = {November},
|
|
314
|
+
publisher = {Hugging Face},
|
|
315
|
+
title = {ViDoRe V3: a comprehensive evaluation of retrieval for enterprise use-cases},
|
|
316
|
+
year = {2025},
|
|
317
|
+
}
|
|
318
|
+
""",
|
|
319
|
+
prompt={"query": "Find a screenshot that is relevant to the user's question."},
|
|
320
|
+
is_public=True,
|
|
321
|
+
)
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
class Vidore3NuclearRetrieval(AbsTaskRetrieval):
|
|
325
|
+
metadata = TaskMetadata(
|
|
326
|
+
name="Vidore3NuclearRetrieval",
|
|
327
|
+
description="Retrieve associated pages according to questions.",
|
|
328
|
+
reference="https://huggingface.co/blog/QuentinJG/introducing-vidore-v3",
|
|
329
|
+
dataset={
|
|
330
|
+
"path": "mteb-private/Vidore3NuclearRetrieval",
|
|
331
|
+
"revision": "a463fc67fefc01152153101e88a32d5f9515e3e3",
|
|
332
|
+
},
|
|
333
|
+
type="DocumentUnderstanding",
|
|
334
|
+
category="t2i",
|
|
335
|
+
eval_splits=["test"],
|
|
336
|
+
eval_langs=_LANGS,
|
|
337
|
+
main_score="ndcg_at_10",
|
|
338
|
+
date=("2025-10-01", "2025-11-01"),
|
|
339
|
+
domains=["Engineering", "Chemistry"],
|
|
340
|
+
task_subtypes=["Image Text Retrieval"],
|
|
341
|
+
license="cc-by-4.0",
|
|
342
|
+
annotations_creators="derived",
|
|
343
|
+
dialect=[],
|
|
344
|
+
modalities=["text", "image"],
|
|
345
|
+
sample_creation="created and machine-translated",
|
|
346
|
+
bibtex_citation=r"""
|
|
347
|
+
@misc{mace2025vidorev3,
|
|
348
|
+
author = {Macé, Quentin and Loison, Antonio and EDY, Antoine and Xing, Victor and Viaud, Gautier},
|
|
349
|
+
day = {5},
|
|
350
|
+
howpublished = {\url{https://huggingface.co/blog/QuentinJG/introducing-vidore-v3}},
|
|
351
|
+
journal = {Hugging Face Blog},
|
|
352
|
+
month = {November},
|
|
353
|
+
publisher = {Hugging Face},
|
|
354
|
+
title = {ViDoRe V3: a comprehensive evaluation of retrieval for enterprise use-cases},
|
|
355
|
+
year = {2025},
|
|
356
|
+
}
|
|
357
|
+
""",
|
|
358
|
+
prompt={"query": "Find a screenshot that is relevant to the user's question."},
|
|
359
|
+
is_public=False,
|
|
360
|
+
)
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
class Vidore3TelecomRetrieval(AbsTaskRetrieval):
|
|
364
|
+
metadata = TaskMetadata(
|
|
365
|
+
name="Vidore3TelecomRetrieval",
|
|
366
|
+
description="Retrieve associated pages according to questions.",
|
|
367
|
+
reference="https://huggingface.co/blog/QuentinJG/introducing-vidore-v3",
|
|
368
|
+
dataset={
|
|
369
|
+
"path": "mteb-private/Vidore3TelecomRetrieval",
|
|
370
|
+
"revision": "a54635a274ef2835721b7cbe3eb27483b9ec964b",
|
|
371
|
+
},
|
|
372
|
+
type="DocumentUnderstanding",
|
|
373
|
+
category="t2i",
|
|
374
|
+
eval_splits=["test"],
|
|
375
|
+
eval_langs=_LANGS,
|
|
376
|
+
main_score="ndcg_at_10",
|
|
377
|
+
date=("2025-10-01", "2025-11-01"),
|
|
378
|
+
domains=["Engineering", "Programming"],
|
|
379
|
+
task_subtypes=["Image Text Retrieval"],
|
|
380
|
+
license="cc-by-4.0",
|
|
381
|
+
annotations_creators="derived",
|
|
382
|
+
dialect=[],
|
|
383
|
+
modalities=["text", "image"],
|
|
384
|
+
sample_creation="created and machine-translated",
|
|
385
|
+
bibtex_citation=r"""
|
|
386
|
+
@misc{mace2025vidorev3,
|
|
387
|
+
author = {Macé, Quentin and Loison, Antonio and EDY, Antoine and Xing, Victor and Viaud, Gautier},
|
|
388
|
+
day = {5},
|
|
389
|
+
howpublished = {\url{https://huggingface.co/blog/QuentinJG/introducing-vidore-v3}},
|
|
390
|
+
journal = {Hugging Face Blog},
|
|
391
|
+
month = {November},
|
|
392
|
+
publisher = {Hugging Face},
|
|
393
|
+
title = {ViDoRe V3: a comprehensive evaluation of retrieval for enterprise use-cases},
|
|
394
|
+
year = {2025},
|
|
395
|
+
}
|
|
396
|
+
""",
|
|
397
|
+
prompt={"query": "Find a screenshot that is relevant to the user's question."},
|
|
398
|
+
is_public=False,
|
|
399
|
+
)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from .argu_ana_nl_retrieval import ArguAnaNL
|
|
1
|
+
from .argu_ana_nl_retrieval import ArguAnaNL, ArguAnaNLv2
|
|
2
2
|
from .bbsard_nl_retrieval import BBSARDNLRetrieval
|
|
3
3
|
from .climate_fevernl_retrieval import ClimateFEVERNL
|
|
4
4
|
from .cqa_dupstack_android_nl_retrieval import CQADupstackAndroidNLRetrieval
|
|
@@ -20,12 +20,12 @@ from .fi_qa2018_nl_retrieval import FiQA2018NL
|
|
|
20
20
|
from .hotpot_qanl_retrieval import HotpotQANL
|
|
21
21
|
from .legal_qa_nl_retrieval import LegalQANLRetrieval
|
|
22
22
|
from .mmarconl_retrieval import MMMARCONL
|
|
23
|
-
from .nf_corpus_nl_retrieval import NFCorpusNL
|
|
23
|
+
from .nf_corpus_nl_retrieval import NFCorpusNL, NFCorpusNLv2
|
|
24
24
|
from .nqnl_retrieval import NQNL
|
|
25
25
|
from .open_tender_retrieval import OpenTenderRetrieval
|
|
26
26
|
from .quora_nl_retrieval import QuoraNLRetrieval
|
|
27
|
-
from .sci_fact_nl_retrieval import SciFactNL
|
|
28
|
-
from .scidocsnl_retrieval import SCIDOCSNL
|
|
27
|
+
from .sci_fact_nl_retrieval import SciFactNL, SciFactNLv2
|
|
28
|
+
from .scidocsnl_retrieval import SCIDOCSNL, SCIDOCSNLv2
|
|
29
29
|
from .touche2020_nl_retrieval import Touche2020NL
|
|
30
30
|
from .treccovidnl_retrieval import TRECCOVIDNL
|
|
31
31
|
from .vabb_retrieval import VABBRetrieval
|
|
@@ -37,6 +37,7 @@ __all__ = [
|
|
|
37
37
|
"SCIDOCSNL",
|
|
38
38
|
"TRECCOVIDNL",
|
|
39
39
|
"ArguAnaNL",
|
|
40
|
+
"ArguAnaNLv2",
|
|
40
41
|
"BBSARDNLRetrieval",
|
|
41
42
|
"CQADupstackAndroidNLRetrieval",
|
|
42
43
|
"CQADupstackEnglishNLRetrieval",
|
|
@@ -57,9 +58,12 @@ __all__ = [
|
|
|
57
58
|
"HotpotQANL",
|
|
58
59
|
"LegalQANLRetrieval",
|
|
59
60
|
"NFCorpusNL",
|
|
61
|
+
"NFCorpusNLv2",
|
|
60
62
|
"OpenTenderRetrieval",
|
|
61
63
|
"QuoraNLRetrieval",
|
|
64
|
+
"SCIDOCSNLv2",
|
|
62
65
|
"SciFactNL",
|
|
66
|
+
"SciFactNLv2",
|
|
63
67
|
"Touche2020NL",
|
|
64
68
|
"VABBRetrieval",
|
|
65
69
|
]
|
|
@@ -1,33 +1,26 @@
|
|
|
1
1
|
from mteb.abstasks.retrieval import AbsTaskRetrieval
|
|
2
2
|
from mteb.abstasks.task_metadata import TaskMetadata
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
domains=["Written", "Non-fiction"],
|
|
25
|
-
task_subtypes=[],
|
|
26
|
-
license="cc-by-sa-4.0",
|
|
27
|
-
annotations_creators="derived",
|
|
28
|
-
dialect=[],
|
|
29
|
-
sample_creation="machine-translated and verified", # manually checked a small subset
|
|
30
|
-
bibtex_citation=r"""
|
|
4
|
+
_argu_ana_nl_metadata = dict(
|
|
5
|
+
reference="https://huggingface.co/datasets/clips/beir-nl-arguana",
|
|
6
|
+
dataset={
|
|
7
|
+
"path": "clips/beir-nl-arguana",
|
|
8
|
+
"revision": "4cd085d148fe2cac923bb7758d6ef585926170ba",
|
|
9
|
+
},
|
|
10
|
+
type="Retrieval",
|
|
11
|
+
category="t2t",
|
|
12
|
+
modalities=["text"],
|
|
13
|
+
eval_splits=["test"],
|
|
14
|
+
eval_langs=["nld-Latn"],
|
|
15
|
+
main_score="ndcg_at_10",
|
|
16
|
+
date=("2016-03-01", "2016-03-01"), # best guess: based on publication date
|
|
17
|
+
domains=["Written", "Non-fiction"],
|
|
18
|
+
task_subtypes=[],
|
|
19
|
+
license="cc-by-sa-4.0",
|
|
20
|
+
annotations_creators="derived",
|
|
21
|
+
dialect=[],
|
|
22
|
+
sample_creation="machine-translated and verified", # manually checked a small subset
|
|
23
|
+
bibtex_citation=r"""
|
|
31
24
|
@misc{banar2024beirnlzeroshotinformationretrieval,
|
|
32
25
|
archiveprefix = {arXiv},
|
|
33
26
|
author = {Nikolay Banar and Ehsan Lotfi and Walter Daelemans},
|
|
@@ -38,5 +31,31 @@ class ArguAnaNL(AbsTaskRetrieval):
|
|
|
38
31
|
year = {2024},
|
|
39
32
|
}
|
|
40
33
|
""",
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class ArguAnaNL(AbsTaskRetrieval):
|
|
38
|
+
ignore_identical_ids = True
|
|
39
|
+
|
|
40
|
+
metadata = TaskMetadata(
|
|
41
|
+
name="ArguAna-NL",
|
|
42
|
+
description="ArguAna involves the task of retrieval of the best counterargument to an argument. ArguAna-NL is "
|
|
43
|
+
"a Dutch translation.",
|
|
41
44
|
adapted_from=["ArguAna"],
|
|
45
|
+
**_argu_ana_nl_metadata,
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class ArguAnaNLv2(AbsTaskRetrieval):
|
|
50
|
+
ignore_identical_ids = True
|
|
51
|
+
|
|
52
|
+
metadata = TaskMetadata(
|
|
53
|
+
name="ArguAna-NL.v2",
|
|
54
|
+
description="ArguAna involves the task of retrieval of the best counterargument to an argument. ArguAna-NL is "
|
|
55
|
+
"a Dutch translation. This version adds a Dutch prompt to the dataset.",
|
|
56
|
+
prompt={
|
|
57
|
+
"query": "Gegeven een bewering, vind documenten die de bewering weerleggen"
|
|
58
|
+
},
|
|
59
|
+
adapted_from=["ArguAna-NL"],
|
|
60
|
+
**_argu_ana_nl_metadata,
|
|
42
61
|
)
|
|
@@ -1,31 +1,26 @@
|
|
|
1
1
|
from mteb.abstasks.retrieval import AbsTaskRetrieval
|
|
2
2
|
from mteb.abstasks.task_metadata import TaskMetadata
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
license="cc-by-4.0",
|
|
25
|
-
annotations_creators="derived",
|
|
26
|
-
dialect=[],
|
|
27
|
-
sample_creation="machine-translated and verified", # manually checked a small subset
|
|
28
|
-
bibtex_citation=r"""
|
|
4
|
+
_nf_corpus_metadata = dict(
|
|
5
|
+
dataset={
|
|
6
|
+
"path": "clips/beir-nl-nfcorpus",
|
|
7
|
+
"revision": "942953e674fd0f619ff89897abb806dc3df5dd39",
|
|
8
|
+
},
|
|
9
|
+
reference="https://huggingface.co/datasets/clips/beir-nl-nfcorpus",
|
|
10
|
+
type="Retrieval",
|
|
11
|
+
category="t2t",
|
|
12
|
+
modalities=["text"],
|
|
13
|
+
eval_splits=["test"],
|
|
14
|
+
eval_langs=["nld-Latn"],
|
|
15
|
+
main_score="ndcg_at_10",
|
|
16
|
+
date=("2016-03-01", "2016-03-01"), # best guess: based on publication date
|
|
17
|
+
domains=["Medical", "Academic", "Written"],
|
|
18
|
+
task_subtypes=[],
|
|
19
|
+
license="cc-by-4.0",
|
|
20
|
+
annotations_creators="derived",
|
|
21
|
+
dialect=[],
|
|
22
|
+
sample_creation="machine-translated and verified", # manually checked a small subset
|
|
23
|
+
bibtex_citation=r"""
|
|
29
24
|
@misc{banar2024beirnlzeroshotinformationretrieval,
|
|
30
25
|
archiveprefix = {arXiv},
|
|
31
26
|
author = {Nikolay Banar and Ehsan Lotfi and Walter Daelemans},
|
|
@@ -36,5 +31,27 @@ class NFCorpusNL(AbsTaskRetrieval):
|
|
|
36
31
|
year = {2024},
|
|
37
32
|
}
|
|
38
33
|
""",
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class NFCorpusNL(AbsTaskRetrieval):
|
|
38
|
+
metadata = TaskMetadata(
|
|
39
|
+
name="NFCorpus-NL",
|
|
40
|
+
description="NFCorpus: A Full-Text Learning to Rank Dataset for Medical Information Retrieval. NFCorpus-NL is "
|
|
41
|
+
"a Dutch translation.",
|
|
39
42
|
adapted_from=["NFCorpus"],
|
|
43
|
+
**_nf_corpus_metadata,
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class NFCorpusNLv2(AbsTaskRetrieval):
|
|
48
|
+
metadata = TaskMetadata(
|
|
49
|
+
name="NFCorpus-NL.v2",
|
|
50
|
+
description="NFCorpus: A Full-Text Learning to Rank Dataset for Medical Information Retrieval. NFCorpus-NL is "
|
|
51
|
+
"a Dutch translation. This version adds a Dutch prompt to the dataset.",
|
|
52
|
+
adapted_from=["NFCorpus-NL"],
|
|
53
|
+
prompt={
|
|
54
|
+
"query": "Gegeven een vraag, haal relevante documenten op die de vraag het beste beantwoorden"
|
|
55
|
+
},
|
|
56
|
+
**_nf_corpus_metadata,
|
|
40
57
|
)
|