mteb 2.5.2__py3-none-any.whl → 2.7.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/__init__.py +2 -0
- mteb/_create_dataloaders.py +78 -30
- mteb/_evaluators/any_sts_evaluator.py +13 -6
- mteb/_evaluators/clustering_evaluator.py +13 -5
- mteb/_evaluators/evaluator.py +12 -4
- mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +22 -11
- mteb/_evaluators/pair_classification_evaluator.py +17 -7
- mteb/_evaluators/retrieval_evaluator.py +23 -14
- mteb/_evaluators/retrieval_metrics.py +26 -19
- mteb/_evaluators/sklearn_evaluator.py +27 -17
- mteb/_evaluators/text/bitext_mining_evaluator.py +36 -20
- mteb/_evaluators/text/summarization_evaluator.py +31 -20
- mteb/_evaluators/zeroshot_classification_evaluator.py +16 -5
- mteb/_helpful_enum.py +5 -1
- mteb/abstasks/_data_filter/filters.py +9 -3
- mteb/abstasks/_data_filter/task_pipelines.py +10 -2
- mteb/abstasks/_statistics_calculation.py +21 -11
- mteb/abstasks/_stratification.py +18 -18
- mteb/abstasks/abstask.py +78 -44
- mteb/abstasks/aggregate_task_metadata.py +21 -18
- mteb/abstasks/aggregated_task.py +23 -35
- mteb/abstasks/classification.py +39 -18
- mteb/abstasks/clustering.py +37 -20
- mteb/abstasks/clustering_legacy.py +30 -16
- mteb/abstasks/image/image_text_pair_classification.py +26 -9
- mteb/abstasks/multilabel_classification.py +33 -21
- mteb/abstasks/pair_classification.py +44 -19
- mteb/abstasks/regression.py +18 -10
- mteb/abstasks/retrieval.py +82 -52
- mteb/abstasks/retrieval_dataset_loaders.py +50 -39
- mteb/abstasks/sts.py +34 -15
- mteb/abstasks/task_metadata.py +44 -37
- mteb/abstasks/text/bitext_mining.py +57 -35
- mteb/abstasks/text/reranking.py +10 -8
- mteb/abstasks/text/summarization.py +26 -10
- mteb/abstasks/zeroshot_classification.py +27 -9
- mteb/benchmarks/_create_table.py +13 -7
- mteb/benchmarks/benchmark.py +15 -3
- mteb/benchmarks/benchmarks/__init__.py +6 -0
- mteb/benchmarks/benchmarks/benchmarks.py +153 -13
- mteb/benchmarks/benchmarks/rteb_benchmarks.py +20 -9
- mteb/benchmarks/get_benchmark.py +14 -55
- mteb/cache.py +189 -31
- mteb/cli/_display_tasks.py +10 -4
- mteb/cli/build_cli.py +112 -13
- mteb/cli/generate_model_card.py +50 -23
- mteb/deprecated_evaluator.py +72 -54
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2CybersecurityRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EconomicRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EnergyRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2HrRetrieval.json +32 -0
- mteb/descriptive_stats/Retrieval/BrightAopsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightBiologyLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightBiologyRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEarthScienceLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEarthScienceRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEconomicsLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEconomicsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightLeetcodeRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPonyLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPonyRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPsychologyLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPsychologyRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightRoboticsLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightRoboticsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightStackoverflowLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightStackoverflowRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightSustainableLivingLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightSustainableLivingRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightTheoremQAQuestionsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightTheoremQATheoremsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/ChemRxivRetrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/EuroPIRQRetrieval.json +116 -0
- mteb/descriptive_stats/Retrieval/NanoClimateFEVER-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoDBPedia-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoFEVER-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoHotpotQA-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoMSMARCO-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoNQ-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/TVPLRetrieval.json +30 -0
- mteb/evaluate.py +71 -47
- mteb/filter_tasks.py +36 -32
- mteb/get_tasks.py +37 -33
- mteb/languages/language_scripts.py +11 -4
- mteb/leaderboard/app.py +172 -37
- mteb/leaderboard/table.py +7 -2
- mteb/load_results.py +20 -14
- mteb/models/abs_encoder.py +30 -16
- mteb/models/cache_wrappers/cache_backend_protocol.py +7 -7
- mteb/models/cache_wrappers/cache_backends/_hash_utils.py +10 -5
- mteb/models/cache_wrappers/cache_backends/faiss_cache.py +13 -4
- mteb/models/cache_wrappers/cache_backends/numpy_cache.py +43 -25
- mteb/models/cache_wrappers/cache_wrapper.py +16 -11
- mteb/models/get_model_meta.py +53 -9
- mteb/models/instruct_wrapper.py +41 -13
- mteb/models/model_implementations/align_models.py +11 -5
- mteb/models/model_implementations/amazon_models.py +1 -0
- mteb/models/model_implementations/andersborges.py +6 -4
- mteb/models/model_implementations/ara_models.py +2 -1
- mteb/models/model_implementations/arctic_models.py +16 -8
- mteb/models/model_implementations/b1ade_models.py +2 -1
- mteb/models/model_implementations/bedrock_models.py +20 -6
- mteb/models/model_implementations/bge_models.py +85 -22
- mteb/models/model_implementations/bica_model.py +4 -3
- mteb/models/model_implementations/blip2_models.py +13 -6
- mteb/models/model_implementations/blip_models.py +33 -20
- mteb/models/model_implementations/bm25.py +27 -17
- mteb/models/model_implementations/bmretriever_models.py +16 -6
- mteb/models/model_implementations/cadet_models.py +2 -1
- mteb/models/model_implementations/cde_models.py +22 -9
- mteb/models/model_implementations/clip_models.py +18 -10
- mteb/models/model_implementations/clips_models.py +6 -3
- mteb/models/model_implementations/codefuse_models.py +10 -5
- mteb/models/model_implementations/codesage_models.py +6 -3
- mteb/models/model_implementations/cohere_models.py +19 -9
- mteb/models/model_implementations/cohere_v.py +16 -6
- mteb/models/model_implementations/colpali_models.py +10 -6
- mteb/models/model_implementations/colqwen_models.py +24 -38
- mteb/models/model_implementations/colsmol_models.py +5 -3
- mteb/models/model_implementations/conan_models.py +12 -5
- mteb/models/model_implementations/dino_models.py +70 -46
- mteb/models/model_implementations/e5_instruct.py +27 -4
- mteb/models/model_implementations/e5_models.py +18 -9
- mteb/models/model_implementations/e5_v.py +16 -10
- mteb/models/model_implementations/eagerworks_models.py +12 -5
- mteb/models/model_implementations/emillykkejensen_models.py +9 -6
- mteb/models/model_implementations/en_code_retriever.py +2 -1
- mteb/models/model_implementations/euler_models.py +3 -2
- mteb/models/model_implementations/evaclip_models.py +13 -4
- mteb/models/model_implementations/fa_models.py +18 -9
- mteb/models/model_implementations/facebookai.py +16 -2
- mteb/models/model_implementations/geogpt_models.py +2 -1
- mteb/models/model_implementations/gme_v_models.py +13 -8
- mteb/models/model_implementations/google_models.py +16 -5
- mteb/models/model_implementations/granite_vision_embedding_models.py +8 -6
- mteb/models/model_implementations/gritlm_models.py +5 -2
- mteb/models/model_implementations/gte_models.py +34 -13
- mteb/models/model_implementations/hinvec_models.py +7 -2
- mteb/models/model_implementations/human.py +1 -0
- mteb/models/model_implementations/ibm_granite_models.py +36 -6
- mteb/models/model_implementations/inf_models.py +4 -2
- mteb/models/model_implementations/jasper_models.py +16 -7
- mteb/models/model_implementations/jina_clip.py +58 -14
- mteb/models/model_implementations/jina_models.py +35 -16
- mteb/models/model_implementations/kalm_models.py +24 -12
- mteb/models/model_implementations/kblab.py +13 -6
- mteb/models/model_implementations/kennethenevoldsen_models.py +6 -4
- mteb/models/model_implementations/kfst.py +2 -1
- mteb/models/model_implementations/kowshik24_models.py +2 -1
- mteb/models/model_implementations/lens_models.py +2 -0
- mteb/models/model_implementations/lgai_embedding_models.py +2 -1
- mteb/models/model_implementations/linq_models.py +8 -2
- mteb/models/model_implementations/listconranker.py +11 -5
- mteb/models/model_implementations/llm2clip_models.py +18 -10
- mteb/models/model_implementations/llm2vec_models.py +28 -14
- mteb/models/model_implementations/mcinext_models.py +12 -3
- mteb/models/model_implementations/mdbr_models.py +19 -3
- mteb/models/model_implementations/misc_models.py +131 -68
- mteb/models/model_implementations/mixedbread_ai_models.py +335 -0
- mteb/models/model_implementations/mme5_models.py +3 -2
- mteb/models/model_implementations/moco_models.py +15 -8
- mteb/models/model_implementations/mod_models.py +3 -2
- mteb/models/model_implementations/model2vec_models.py +37 -18
- mteb/models/model_implementations/moka_models.py +4 -1
- mteb/models/model_implementations/nbailab.py +6 -3
- mteb/models/model_implementations/no_instruct_sentence_models.py +15 -7
- mteb/models/model_implementations/nomic_models.py +47 -19
- mteb/models/model_implementations/nomic_models_vision.py +6 -4
- mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +20 -8
- mteb/models/model_implementations/nvidia_models.py +165 -22
- mteb/models/model_implementations/octen_models.py +64 -3
- mteb/models/model_implementations/openai_models.py +14 -4
- mteb/models/model_implementations/openclip_models.py +30 -17
- mteb/models/model_implementations/opensearch_neural_sparse_models.py +20 -9
- mteb/models/model_implementations/ops_moa_models.py +10 -3
- mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +2 -1
- mteb/models/model_implementations/pawan_models.py +2 -1
- mteb/models/model_implementations/piccolo_models.py +3 -1
- mteb/models/model_implementations/pixie_models.py +56 -0
- mteb/models/model_implementations/promptriever_models.py +20 -10
- mteb/models/model_implementations/pylate_models.py +41 -21
- mteb/models/model_implementations/qodo_models.py +4 -2
- mteb/models/model_implementations/qtack_models.py +2 -1
- mteb/models/model_implementations/qwen3_models.py +14 -4
- mteb/models/model_implementations/qzhou_models.py +4 -2
- mteb/models/model_implementations/random_baseline.py +7 -6
- mteb/models/model_implementations/rasgaard_models.py +3 -2
- mteb/models/model_implementations/reasonir_model.py +66 -1
- mteb/models/model_implementations/repllama_models.py +18 -9
- mteb/models/model_implementations/rerankers_custom.py +25 -10
- mteb/models/model_implementations/rerankers_monot5_based.py +41 -21
- mteb/models/model_implementations/richinfoai_models.py +2 -1
- mteb/models/model_implementations/ru_sentence_models.py +40 -20
- mteb/models/model_implementations/ruri_models.py +20 -10
- mteb/models/model_implementations/salesforce_models.py +13 -4
- mteb/models/model_implementations/samilpwc_models.py +2 -1
- mteb/models/model_implementations/sarashina_embedding_models.py +4 -2
- mteb/models/model_implementations/searchmap_models.py +2 -1
- mteb/models/model_implementations/seed_1_6_embedding_models.py +5 -2
- mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +119 -148
- mteb/models/model_implementations/seed_models.py +2 -1
- mteb/models/model_implementations/sentence_transformers_models.py +142 -22
- mteb/models/model_implementations/shuu_model.py +2 -1
- mteb/models/model_implementations/siglip_models.py +39 -24
- mteb/models/model_implementations/slm_models.py +419 -0
- mteb/models/model_implementations/sonar_models.py +2 -1
- mteb/models/model_implementations/spartan8806_atles_champion.py +2 -1
- mteb/models/model_implementations/stella_models.py +23 -4
- mteb/models/model_implementations/tarka_models.py +4 -2
- mteb/models/model_implementations/text2vec_models.py +12 -3
- mteb/models/model_implementations/ua_sentence_models.py +2 -1
- mteb/models/model_implementations/uae_models.py +17 -5
- mteb/models/model_implementations/vdr_models.py +9 -2
- mteb/models/model_implementations/vi_vn_models.py +12 -6
- mteb/models/model_implementations/vista_models.py +11 -4
- mteb/models/model_implementations/vlm2vec_models.py +14 -7
- mteb/models/model_implementations/voyage_models.py +136 -4
- mteb/models/model_implementations/voyage_v.py +17 -10
- mteb/models/model_implementations/xyz_models.py +1 -0
- mteb/models/model_implementations/youtu_models.py +2 -1
- mteb/models/model_implementations/yuan_models.py +2 -1
- mteb/models/model_implementations/yuan_models_en.py +3 -2
- mteb/models/model_meta.py +127 -40
- mteb/models/models_protocols.py +43 -22
- mteb/models/search_encoder_index/search_backend_protocol.py +7 -3
- mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +21 -10
- mteb/models/search_wrappers.py +63 -29
- mteb/models/sentence_transformer_wrapper.py +52 -26
- mteb/models/vllm_wrapper.py +329 -0
- mteb/py.typed +0 -0
- mteb/results/benchmark_results.py +48 -35
- mteb/results/model_result.py +68 -32
- mteb/results/task_result.py +110 -72
- mteb/similarity_functions.py +19 -9
- mteb/tasks/aggregated_tasks/eng/cqadupstack_retrieval.py +3 -3
- mteb/tasks/aggregated_tasks/eng/sts17_multilingual_visual_sts_eng.py +3 -3
- mteb/tasks/aggregated_tasks/eng/sts_benchmark_multilingual_visual_sts_eng.py +3 -3
- mteb/tasks/aggregated_tasks/fas/cqadupstack_retrieval_fa.py +3 -3
- mteb/tasks/aggregated_tasks/fas/syn_per_chatbot_conv_sa_classification.py +3 -3
- mteb/tasks/aggregated_tasks/multilingual/sts17_multilingual_vision_sts.py +3 -3
- mteb/tasks/aggregated_tasks/multilingual/sts_benchmark_multilingual_visual_sts.py +3 -3
- mteb/tasks/aggregated_tasks/nld/cqadupstack_nl_retrieval.py +3 -3
- mteb/tasks/aggregated_tasks/pol/cqadupstack_retrieval_pl.py +3 -3
- mteb/tasks/bitext_mining/eng/pub_chem_smiles_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/fas/fa_mteb_summary_retrieval.py +3 -3
- mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/flores_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/in22_conv_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/in22_gen_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/norwegian_courts_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/ntrex_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/roma_tales_bitext_mining.py +2 -2
- mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -2
- mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -1
- mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -1
- mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -1
- mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -1
- mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -1
- mteb/tasks/classification/bul/bulgarian_store_review_sentiment_classfication.py +1 -1
- mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +2 -2
- mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -1
- mteb/tasks/classification/dan/dk_hate_classification.py +2 -2
- mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -1
- mteb/tasks/classification/ell/greek_legal_code_classification.py +1 -1
- mteb/tasks/classification/eng/dbpedia_classification.py +2 -2
- mteb/tasks/classification/eng/toxic_chat_classification.py +2 -2
- mteb/tasks/classification/eng/toxic_conversations_classification.py +2 -2
- mteb/tasks/classification/eng/tweet_topic_single_classification.py +1 -1
- mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -1
- mteb/tasks/classification/eng/yelp_review_full_classification.py +2 -2
- mteb/tasks/classification/est/estonian_valence.py +2 -2
- mteb/tasks/classification/fas/fa_mteb_classification.py +6 -6
- mteb/tasks/classification/fas/persian_food_sentiment_classification.py +1 -1
- mteb/tasks/classification/fil/filipino_shopee_reviews_classification.py +1 -1
- mteb/tasks/classification/fin/fin_toxicity_classification.py +1 -1
- mteb/tasks/classification/fra/french_book_reviews.py +2 -2
- mteb/tasks/classification/fra/movie_review_sentiment_classification.py +2 -2
- mteb/tasks/classification/guj/gujarati_news_classification.py +1 -1
- mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -1
- mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -1
- mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +2 -2
- mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -1
- mteb/tasks/classification/ita/dado_eval_coarse_classification.py +1 -1
- mteb/tasks/classification/ita/ita_casehold_classification.py +1 -1
- mteb/tasks/classification/ita/sardi_stance_classification.py +1 -1
- mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -1
- mteb/tasks/classification/jpn/wrime_classification.py +1 -1
- mteb/tasks/classification/kan/kannada_news_classification.py +2 -2
- mteb/tasks/classification/kor/klue_tc.py +2 -2
- mteb/tasks/classification/kor/kor_fin.py +1 -1
- mteb/tasks/classification/kor/kor_hate_classification.py +1 -1
- mteb/tasks/classification/kor/kor_sarcasm_classification.py +1 -1
- mteb/tasks/classification/kur/kurdish_sentiment_classification.py +2 -2
- mteb/tasks/classification/mal/malayalam_news_classification.py +1 -1
- mteb/tasks/classification/mar/marathi_news_classification.py +1 -1
- mteb/tasks/classification/multilingual/afri_senti_lang_classification.py +1 -1
- mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -1
- mteb/tasks/classification/multilingual/cyrillic_turkic_lang_classification.py +1 -1
- mteb/tasks/classification/multilingual/indic_nlp_news_classification.py +1 -1
- mteb/tasks/classification/multilingual/masakha_news_classification.py +1 -1
- mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -1
- mteb/tasks/classification/multilingual/multilingual_sentiment_classification.py +1 -1
- mteb/tasks/classification/multilingual/scala_classification.py +2 -2
- mteb/tasks/classification/multilingual/sib200_classification.py +1 -1
- mteb/tasks/classification/multilingual/turkic_classification.py +1 -1
- mteb/tasks/classification/multilingual/tweet_sentiment_classification.py +1 -1
- mteb/tasks/classification/nep/nepali_news_classification.py +2 -2
- mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +1 -1
- mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +1 -1
- mteb/tasks/classification/ory/odia_news_classification.py +2 -2
- mteb/tasks/classification/pan/punjabi_news_classification.py +1 -1
- mteb/tasks/classification/ron/moroco.py +1 -1
- mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -1
- mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -1
- mteb/tasks/classification/rus/georeview_classification.py +1 -1
- mteb/tasks/classification/rus/headline_classification.py +2 -2
- mteb/tasks/classification/rus/inappropriateness_classification.py +2 -2
- mteb/tasks/classification/rus/ru_reviews_classification.py +2 -2
- mteb/tasks/classification/rus/ru_sci_bench_grnti_classification.py +1 -1
- mteb/tasks/classification/rus/ru_sci_bench_oecd_classification.py +1 -1
- mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -1
- mteb/tasks/classification/san/sanskrit_shlokas_classification.py +1 -1
- mteb/tasks/classification/sin/sinhala_news_classification.py +2 -2
- mteb/tasks/classification/sin/sinhala_news_source_classification.py +2 -2
- mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +2 -2
- mteb/tasks/classification/slv/frenk_sl_classification.py +1 -1
- mteb/tasks/classification/spa/spanish_news_classification.py +2 -2
- mteb/tasks/classification/ssw/siswati_news_classification.py +1 -1
- mteb/tasks/classification/tam/tamil_news_classification.py +2 -2
- mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +2 -2
- mteb/tasks/classification/tha/wongnai_reviews_classification.py +1 -1
- mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +2 -2
- mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -2
- mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -1
- mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -1
- mteb/tasks/classification/zho/yue_openrice_review_classification.py +2 -2
- mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -1
- mteb/tasks/clustering/deu/blurbs_clustering_p2p.py +1 -1
- mteb/tasks/clustering/deu/blurbs_clustering_s2s.py +1 -1
- mteb/tasks/clustering/eng/arxiv_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/arxiv_hierarchical_clustering.py +2 -2
- mteb/tasks/clustering/eng/big_patent_clustering.py +1 -1
- mteb/tasks/clustering/eng/biorxiv_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/biorxiv_clustering_s2s.py +1 -1
- mteb/tasks/clustering/eng/hume_wiki_cities_clustering.py +1 -1
- mteb/tasks/clustering/eng/medrxiv_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/medrxiv_clustering_s2s.py +1 -1
- mteb/tasks/clustering/eng/reddit_clustering.py +1 -1
- mteb/tasks/clustering/eng/reddit_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/stack_exchange_clustering.py +1 -1
- mteb/tasks/clustering/eng/stack_exchange_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/twenty_newsgroups_clustering.py +1 -1
- mteb/tasks/clustering/eng/wiki_cities_clustering.py +1 -1
- mteb/tasks/clustering/fas/fa_mteb_clustering.py +4 -4
- mteb/tasks/clustering/fra/hal_clustering_s2s.py +2 -2
- mteb/tasks/clustering/multilingual/mlsum_clustering_p2p.py +2 -2
- mteb/tasks/clustering/multilingual/mlsum_clustering_s2s.py +2 -2
- mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -1
- mteb/tasks/clustering/multilingual/wiki_clustering_p2p.py +1 -1
- mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +1 -1
- mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +1 -1
- mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +1 -1
- mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +1 -1
- mteb/tasks/clustering/nld/vabb_clustering_p2p.py +1 -1
- mteb/tasks/clustering/nld/vabb_clustering_s2s.py +1 -1
- mteb/tasks/clustering/nob/snl_clustering.py +8 -3
- mteb/tasks/clustering/nob/vg_clustering.py +8 -3
- mteb/tasks/clustering/pol/polish_clustering.py +3 -3
- mteb/tasks/clustering/rus/ru_sci_bench_grnti_clustering_p2p.py +1 -1
- mteb/tasks/clustering/rus/ru_sci_bench_oecd_clustering_p2p.py +1 -1
- mteb/tasks/clustering/zho/cmteb_clustering.py +6 -6
- mteb/tasks/image_text_pair_classification/eng/image_co_de.py +1 -1
- mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +2 -2
- mteb/tasks/instruction_reranking/multilingual/m_follow_ir.py +2 -2
- mteb/tasks/multichoice/eng/cv_bench.py +4 -4
- mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -1
- mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -1
- mteb/tasks/multilabel_classification/rus/ru_toixic_multilabelclassification_okmlcup.py +1 -1
- mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
- mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -1
- mteb/tasks/pair_classification/ara/ar_entail.py +1 -1
- mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -1
- mteb/tasks/pair_classification/deu/false_friends_de_en_pc.py +1 -1
- mteb/tasks/pair_classification/eng/pub_chem_ai_sentence_paraphrase_pc.py +1 -1
- mteb/tasks/pair_classification/eng/pub_chem_smilespc.py +4 -3
- mteb/tasks/pair_classification/eng/pub_chem_synonym_pc.py +1 -1
- mteb/tasks/pair_classification/eng/pub_chem_wiki_paragraphs_pc.py +1 -1
- mteb/tasks/pair_classification/eng/sprint_duplicate_questions_pc.py +1 -1
- mteb/tasks/pair_classification/eng/twitter_sem_eval2015_pc.py +1 -1
- mteb/tasks/pair_classification/eng/twitter_url_corpus_pc.py +1 -1
- mteb/tasks/pair_classification/fas/fa_mteb_pair_classification.py +5 -5
- mteb/tasks/pair_classification/fas/fars_tail.py +2 -2
- mteb/tasks/pair_classification/hye/armenian_paraphrase_pc.py +1 -1
- mteb/tasks/pair_classification/ita/dis_co_tex_pair_classification.py +1 -1
- mteb/tasks/pair_classification/kor/klue_nli.py +1 -1
- mteb/tasks/pair_classification/multilingual/rte3.py +2 -2
- mteb/tasks/pair_classification/multilingual/xnli.py +1 -1
- mteb/tasks/pair_classification/pol/polish_pc.py +4 -4
- mteb/tasks/pair_classification/por/assin2_rte.py +1 -1
- mteb/tasks/pair_classification/por/sick_br_pc.py +1 -1
- mteb/tasks/pair_classification/rus/terra.py +2 -2
- mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -1
- mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -1
- mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -1
- mteb/tasks/pair_classification/zho/cmteb_pair_classification.py +2 -2
- mteb/tasks/reranking/multilingual/wikipedia_reranking_multilingual.py +1 -1
- mteb/tasks/retrieval/ara/sadeem_question_retrieval.py +1 -1
- mteb/tasks/retrieval/code/code_edit_search_retrieval.py +1 -1
- mteb/tasks/retrieval/code/code_rag.py +16 -16
- mteb/tasks/retrieval/code/code_search_net_cc_retrieval.py +1 -1
- mteb/tasks/retrieval/code/coir_code_search_net_retrieval.py +1 -1
- mteb/tasks/retrieval/code/ds1000_retrieval.py +1 -1
- mteb/tasks/retrieval/code/fresh_stack_retrieval.py +1 -1
- mteb/tasks/retrieval/code/human_eval_retrieval.py +1 -1
- mteb/tasks/retrieval/code/mbpp_retrieval.py +1 -1
- mteb/tasks/retrieval/code/wiki_sql_retrieval.py +1 -1
- mteb/tasks/retrieval/dan/dan_fever_retrieval.py +2 -2
- mteb/tasks/retrieval/dan/tv2_nordretrieval.py +3 -3
- mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +3 -3
- mteb/tasks/retrieval/deu/german_gov_service_retrieval.py +1 -1
- mteb/tasks/retrieval/deu/german_qu_ad_retrieval.py +1 -1
- mteb/tasks/retrieval/ell/greek_civics_qa.py +1 -1
- mteb/tasks/retrieval/eng/__init__.py +44 -0
- mteb/tasks/retrieval/eng/bright_retrieval.py +10 -2
- mteb/tasks/retrieval/eng/bright_v1_1_retrieval.py +968 -0
- mteb/tasks/retrieval/eng/chat_doctor_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/chemrxiv.py +33 -0
- mteb/tasks/retrieval/eng/cub200_i2i_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/fin_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/finance_bench_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/hc3_finance_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_narrative_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_needle_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_passkey_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_summ_screen_fd_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_wikim_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lembqm_sum_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/limit_retrieval.py +6 -1
- mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/ml_questions.py +1 -1
- mteb/tasks/retrieval/eng/nano_argu_ana_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_climate_fever_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_db_pedia_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_fever_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_fi_qa2018_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_hotpot_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_msmarco_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_nf_corpus_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_nq_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_quora_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_sci_fact_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_scidocs_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_touche2020_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/narrative_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/r2_med_retrieval.py +8 -8
- mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +10 -10
- mteb/tasks/retrieval/fra/f_qu_ad_retrieval.py +1 -1
- mteb/tasks/retrieval/fra/syntec_retrieval.py +1 -1
- mteb/tasks/retrieval/hun/hun_sum2.py +1 -1
- mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +1 -1
- mteb/tasks/retrieval/kor/__init__.py +15 -1
- mteb/tasks/retrieval/kor/kovidore2_bench_retrieval.py +142 -0
- mteb/tasks/retrieval/multilingual/__init__.py +2 -0
- mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt19.py +1 -1
- mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt21.py +1 -1
- mteb/tasks/retrieval/multilingual/cur_ev1_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/euro_pirq_retrieval.py +43 -0
- mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/mr_tidy_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +5 -5
- mteb/tasks/retrieval/multilingual/statcan_dialogue_dataset_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +14 -4
- mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +90 -100
- mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/x_qu_ad_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_android_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_english_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_gaming_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_gis_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_mathematica_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_physics_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_programmers_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_stats_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_tex_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_unix_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_webmasters_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_wordpress_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nob/norquad.py +3 -3
- mteb/tasks/retrieval/nob/snl_retrieval.py +3 -3
- mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -1
- mteb/tasks/retrieval/tur/tur_hist_quad.py +1 -1
- mteb/tasks/retrieval/vie/__init__.py +14 -6
- mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/fevervn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +48 -0
- mteb/tasks/retrieval/vie/nqvn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/tvpl_retrieval.py +42 -0
- mteb/tasks/retrieval/vie/vie_qu_ad_retrieval.py +1 -1
- mteb/tasks/retrieval/vie/zac_legal_text_retrieval.py +15 -1
- mteb/tasks/sts/fao/faroese_sts.py +1 -1
- mteb/tasks/sts/fra/sick_fr_sts.py +1 -1
- mteb/tasks/sts/kor/klue_sts.py +1 -1
- mteb/tasks/sts/por/sick_br_sts.py +1 -1
- mteb/tasks/sts/rus/ru_para_phraser_sts.py +1 -1
- mteb/tasks/zeroshot_classification/eng/sci_mmir.py +1 -1
- mteb/types/__init__.py +2 -0
- mteb/types/_encoder_io.py +13 -1
- mteb/types/_result.py +2 -1
- mteb/types/statistics.py +18 -5
- {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/METADATA +15 -4
- {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/RECORD +528 -486
- {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/WHEEL +1 -1
- mteb/models/model_implementations/mxbai_models.py +0 -111
- {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/entry_points.txt +0 -0
- {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
from mteb.abstasks.retrieval import AbsTaskRetrieval
|
|
2
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class KoVidore2CybersecurityRetrieval(AbsTaskRetrieval):
|
|
6
|
+
metadata = TaskMetadata(
|
|
7
|
+
name="KoVidore2CybersecurityRetrieval",
|
|
8
|
+
description="Retrieve associated pages according to questions. This dataset, Cybersecurity, is a corpus of technical reports on cyber threat trends and security incident responses in Korea, intended for complex-document understanding tasks.",
|
|
9
|
+
reference="https://github.com/whybe-choi/kovidore-data-generator",
|
|
10
|
+
dataset={
|
|
11
|
+
"path": "whybe-choi/kovidore-v2-cybersecurity-mteb",
|
|
12
|
+
"revision": "577d7c45f79d8eb4e7584db3990f91daa7e47956",
|
|
13
|
+
},
|
|
14
|
+
type="DocumentUnderstanding",
|
|
15
|
+
category="t2i",
|
|
16
|
+
eval_splits=["test"],
|
|
17
|
+
eval_langs=["kor-Hang"],
|
|
18
|
+
main_score="ndcg_at_10",
|
|
19
|
+
date=("2025-12-21", "2026-01-06"),
|
|
20
|
+
domains=["Social"],
|
|
21
|
+
task_subtypes=["Image Text Retrieval"],
|
|
22
|
+
license="cc-by-4.0",
|
|
23
|
+
annotations_creators="derived",
|
|
24
|
+
dialect=[],
|
|
25
|
+
modalities=["text", "image"],
|
|
26
|
+
sample_creation="created",
|
|
27
|
+
bibtex_citation="""
|
|
28
|
+
@misc{choi2026kovidorev2,
|
|
29
|
+
author = {Yongbin Choi},
|
|
30
|
+
note = {A benchmark for evaluating Korean vision document retrieval with multi-page reasoning queries in practical domains},
|
|
31
|
+
title = {KoViDoRe v2: a comprehensive evaluation of vision document retrieval for enterprise use-cases},
|
|
32
|
+
url = {https://github.com/whybe-choi/kovidore-data-generator},
|
|
33
|
+
year = {2026},
|
|
34
|
+
}
|
|
35
|
+
""",
|
|
36
|
+
prompt={"query": "Find a screenshot that is relevant to the user's question."},
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class KoVidore2EconomicRetrieval(AbsTaskRetrieval):
|
|
41
|
+
metadata = TaskMetadata(
|
|
42
|
+
name="KoVidore2EconomicRetrieval",
|
|
43
|
+
description="Retrieve associated pages according to questions. This dataset, Economic trends, is a corpus of periodic reports on major economic indicators in Korea, intended for complex-document understanding tasks.",
|
|
44
|
+
reference="https://github.com/whybe-choi/kovidore-data-generator",
|
|
45
|
+
dataset={
|
|
46
|
+
"path": "whybe-choi/kovidore-v2-economic-mteb",
|
|
47
|
+
"revision": "0189c26211290a902cd9d41a0db932808a54c0a8",
|
|
48
|
+
},
|
|
49
|
+
type="DocumentUnderstanding",
|
|
50
|
+
category="t2i",
|
|
51
|
+
eval_splits=["test"],
|
|
52
|
+
eval_langs=["kor-Hang"],
|
|
53
|
+
main_score="ndcg_at_10",
|
|
54
|
+
date=("2025-12-21", "2026-01-06"),
|
|
55
|
+
domains=["Social"],
|
|
56
|
+
task_subtypes=["Image Text Retrieval"],
|
|
57
|
+
license="cc-by-4.0",
|
|
58
|
+
annotations_creators="derived",
|
|
59
|
+
dialect=[],
|
|
60
|
+
modalities=["text", "image"],
|
|
61
|
+
sample_creation="created",
|
|
62
|
+
bibtex_citation="""
|
|
63
|
+
@misc{choi2026kovidorev2,
|
|
64
|
+
author = {Yongbin Choi},
|
|
65
|
+
note = {A benchmark for evaluating Korean vision document retrieval with multi-page reasoning queries in practical domains},
|
|
66
|
+
title = {KoViDoRe v2: a comprehensive evaluation of vision document retrieval for enterprise use-cases},
|
|
67
|
+
url = {https://github.com/whybe-choi/kovidore-data-generator},
|
|
68
|
+
year = {2026},
|
|
69
|
+
}
|
|
70
|
+
""",
|
|
71
|
+
prompt={"query": "Find a screenshot that is relevant to the user's question."},
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class KoVidore2EnergyRetrieval(AbsTaskRetrieval):
|
|
76
|
+
metadata = TaskMetadata(
|
|
77
|
+
name="KoVidore2EnergyRetrieval",
|
|
78
|
+
description="Retrieve associated pages according to questions. This dataset, Energy, is a corpus of reports on energy market trends, policy planning, and industry statistics, intended for complex-document understanding tasks.",
|
|
79
|
+
reference="https://github.com/whybe-choi/kovidore-data-generator",
|
|
80
|
+
dataset={
|
|
81
|
+
"path": "whybe-choi/kovidore-v2-energy-mteb",
|
|
82
|
+
"revision": "8c09a3d22b1fa3a7f5e815e9521da9b048754211",
|
|
83
|
+
},
|
|
84
|
+
type="DocumentUnderstanding",
|
|
85
|
+
category="t2i",
|
|
86
|
+
eval_splits=["test"],
|
|
87
|
+
eval_langs=["kor-Hang"],
|
|
88
|
+
main_score="ndcg_at_10",
|
|
89
|
+
date=("2025-12-21", "2026-01-06"),
|
|
90
|
+
domains=["Social"],
|
|
91
|
+
task_subtypes=["Image Text Retrieval"],
|
|
92
|
+
license="cc-by-4.0",
|
|
93
|
+
annotations_creators="derived",
|
|
94
|
+
dialect=[],
|
|
95
|
+
modalities=["text", "image"],
|
|
96
|
+
sample_creation="created",
|
|
97
|
+
bibtex_citation="""
|
|
98
|
+
@misc{choi2026kovidorev2,
|
|
99
|
+
author = {Yongbin Choi},
|
|
100
|
+
note = {A benchmark for evaluating Korean vision document retrieval with multi-page reasoning queries in practical domains},
|
|
101
|
+
title = {KoViDoRe v2: a comprehensive evaluation of vision document retrieval for enterprise use-cases},
|
|
102
|
+
url = {https://github.com/whybe-choi/kovidore-data-generator},
|
|
103
|
+
year = {2026},
|
|
104
|
+
}
|
|
105
|
+
""",
|
|
106
|
+
prompt={"query": "Find a screenshot that is relevant to the user's question."},
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
class KoVidore2HrRetrieval(AbsTaskRetrieval):
|
|
111
|
+
metadata = TaskMetadata(
|
|
112
|
+
name="KoVidore2HrRetrieval",
|
|
113
|
+
description="Retrieve associated pages according to questions. This dataset, HR, is a corpus of reports on workforce outlook and employment policy in korea, intended for complex-document understanding tasks.",
|
|
114
|
+
reference="https://github.com/whybe-choi/kovidore-data-generator",
|
|
115
|
+
dataset={
|
|
116
|
+
"path": "whybe-choi/kovidore-v2-hr-mteb",
|
|
117
|
+
"revision": "d9432c782a9a3e2eed064f6fac08b4c967d92b99",
|
|
118
|
+
},
|
|
119
|
+
type="DocumentUnderstanding",
|
|
120
|
+
category="t2i",
|
|
121
|
+
eval_splits=["test"],
|
|
122
|
+
eval_langs=["kor-Hang"],
|
|
123
|
+
main_score="ndcg_at_10",
|
|
124
|
+
date=("2025-12-21", "2026-01-06"),
|
|
125
|
+
domains=["Social"],
|
|
126
|
+
task_subtypes=["Image Text Retrieval"],
|
|
127
|
+
license="cc-by-4.0",
|
|
128
|
+
annotations_creators="derived",
|
|
129
|
+
dialect=[],
|
|
130
|
+
modalities=["text", "image"],
|
|
131
|
+
sample_creation="created",
|
|
132
|
+
bibtex_citation="""
|
|
133
|
+
@misc{choi2026kovidorev2,
|
|
134
|
+
author = {Yongbin Choi},
|
|
135
|
+
note = {A benchmark for evaluating Korean vision document retrieval with multi-page reasoning queries in practical domains},
|
|
136
|
+
title = {KoViDoRe v2: a comprehensive evaluation of vision document retrieval for enterprise use-cases},
|
|
137
|
+
url = {https://github.com/whybe-choi/kovidore-data-generator},
|
|
138
|
+
year = {2026},
|
|
139
|
+
}
|
|
140
|
+
""",
|
|
141
|
+
prompt={"query": "Find a screenshot that is relevant to the user's question."},
|
|
142
|
+
)
|
|
@@ -6,6 +6,7 @@ from .cross_lingual_semantic_discrimination_wmt21 import (
|
|
|
6
6
|
CrossLingualSemanticDiscriminationWMT21,
|
|
7
7
|
)
|
|
8
8
|
from .cur_ev1_retrieval import CUREv1Retrieval
|
|
9
|
+
from .euro_pirq_retrieval import EuroPIRQRetrieval
|
|
9
10
|
from .indic_qa_retrieval import IndicQARetrieval
|
|
10
11
|
from .jina_vdr_bench_retrieval import (
|
|
11
12
|
JinaVDRAirbnbSyntheticRetrieval,
|
|
@@ -107,6 +108,7 @@ __all__ = [
|
|
|
107
108
|
"CUREv1Retrieval",
|
|
108
109
|
"CrossLingualSemanticDiscriminationWMT19",
|
|
109
110
|
"CrossLingualSemanticDiscriminationWMT21",
|
|
111
|
+
"EuroPIRQRetrieval",
|
|
110
112
|
"IndicQARetrieval",
|
|
111
113
|
"JinaVDRAirbnbSyntheticRetrieval",
|
|
112
114
|
"JinaVDRArabicChartQARetrieval",
|
|
@@ -53,7 +53,7 @@ class CrossLingualSemanticDiscriminationWMT19(AbsTaskRetrieval):
|
|
|
53
53
|
)
|
|
54
54
|
num_of_distractors = 4
|
|
55
55
|
|
|
56
|
-
def load_data(self) -> None:
|
|
56
|
+
def load_data(self, num_proc: int = 1, **kwargs) -> None:
|
|
57
57
|
"""Generic data loader function for original clsd datasets with the format shown in "hf_dataset_link".
|
|
58
58
|
Loading the hf dataset, it populates the following three variables to be used for retrieval evaluation.
|
|
59
59
|
|
|
@@ -54,7 +54,7 @@ class CrossLingualSemanticDiscriminationWMT21(AbsTaskRetrieval):
|
|
|
54
54
|
|
|
55
55
|
num_of_distractors = 4
|
|
56
56
|
|
|
57
|
-
def load_data(self) -> None:
|
|
57
|
+
def load_data(self, num_proc: int = 1, **kwargs) -> None:
|
|
58
58
|
"""Generic data loader function for original clsd datasets with the format shown in "hf_dataset_link".
|
|
59
59
|
Loading the hf dataset, it populates the following three variables to be used for retrieval evaluation.
|
|
60
60
|
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
from mteb.abstasks.retrieval import AbsTaskRetrieval
|
|
2
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
3
|
+
|
|
4
|
+
_LANGUAGES = {
|
|
5
|
+
"en": ["eng-Latn"],
|
|
6
|
+
"fi": ["fin-Latn"],
|
|
7
|
+
"pt": ["por-Latn"],
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class EuroPIRQRetrieval(AbsTaskRetrieval):
|
|
12
|
+
metadata = TaskMetadata(
|
|
13
|
+
name="EuroPIRQRetrieval",
|
|
14
|
+
description="The EuroPIRQ retrieval dataset is a multilingual collection designed for evaluating retrieval and cross-lingual retrieval tasks. Dataset contains 10,000 parallel passages & 100 parallel queries (synthetic) in three languages: English, Portuguese, and Finnish, constructed from the European Union's DGT-Acquis corpus.",
|
|
15
|
+
reference="https://huggingface.co/datasets/eherra/EuroPIRQ-retrieval",
|
|
16
|
+
dataset={
|
|
17
|
+
"path": "eherra/EuroPIRQ-retrieval",
|
|
18
|
+
"revision": "59225ed25fbcea2185e1acbc8c3c80f1a8cd8341",
|
|
19
|
+
},
|
|
20
|
+
type="Retrieval",
|
|
21
|
+
category="t2t",
|
|
22
|
+
modalities=["text"],
|
|
23
|
+
eval_splits=["test"],
|
|
24
|
+
eval_langs=_LANGUAGES,
|
|
25
|
+
main_score="ndcg_at_10",
|
|
26
|
+
date=("2025-12-01", "2025-12-31"),
|
|
27
|
+
domains=["Legal"],
|
|
28
|
+
task_subtypes=[],
|
|
29
|
+
license="not specified",
|
|
30
|
+
annotations_creators="LM-generated and reviewed",
|
|
31
|
+
dialect=[],
|
|
32
|
+
sample_creation="found",
|
|
33
|
+
is_public=True,
|
|
34
|
+
bibtex_citation=r"""
|
|
35
|
+
@misc{eherra_2025_europirq,
|
|
36
|
+
author = { {Elias Herranen} },
|
|
37
|
+
publisher = { Hugging Face },
|
|
38
|
+
title = { EuroPIRQ: European Parallel Information Retrieval Queries },
|
|
39
|
+
url = { https://huggingface.co/datasets/eherra/EuroPIRQ-retrieval },
|
|
40
|
+
year = {2025},
|
|
41
|
+
}
|
|
42
|
+
""",
|
|
43
|
+
)
|
|
@@ -143,7 +143,7 @@ class MIRACLVisionRetrieval(AbsTaskRetrieval):
|
|
|
143
143
|
prompt={"query": "Find a screenshot that is relevant to the user's query."},
|
|
144
144
|
)
|
|
145
145
|
|
|
146
|
-
def load_data(self) -> None:
|
|
146
|
+
def load_data(self, num_proc: int = 1, **kwargs) -> None:
|
|
147
147
|
if self.data_loaded:
|
|
148
148
|
return
|
|
149
149
|
|
|
@@ -30,15 +30,15 @@ def load_ruscibench_data(
|
|
|
30
30
|
|
|
31
31
|
for lang in langs:
|
|
32
32
|
lang_corpus = cast(
|
|
33
|
-
datasets.Dataset,
|
|
33
|
+
"datasets.Dataset",
|
|
34
34
|
datasets.load_dataset(path, f"corpus-{lang}", revision=revision),
|
|
35
35
|
)["corpus"]
|
|
36
36
|
lang_queries = cast(
|
|
37
|
-
datasets.Dataset,
|
|
37
|
+
"datasets.Dataset",
|
|
38
38
|
datasets.load_dataset(path, f"queries-{lang}", revision=revision),
|
|
39
39
|
)["queries"]
|
|
40
40
|
lang_qrels = cast(
|
|
41
|
-
datasets.Dataset,
|
|
41
|
+
"datasets.Dataset",
|
|
42
42
|
datasets.load_dataset(path, f"{lang}", revision=revision),
|
|
43
43
|
)["test"]
|
|
44
44
|
corpus[lang] = {
|
|
@@ -103,7 +103,7 @@ class RuSciBenchCiteRetrieval(AbsTaskRetrieval):
|
|
|
103
103
|
},
|
|
104
104
|
)
|
|
105
105
|
|
|
106
|
-
def load_data(self) -> None:
|
|
106
|
+
def load_data(self, num_proc: int = 1, **kwargs) -> None:
|
|
107
107
|
if self.data_loaded:
|
|
108
108
|
return
|
|
109
109
|
|
|
@@ -161,7 +161,7 @@ class RuSciBenchCociteRetrieval(AbsTaskRetrieval):
|
|
|
161
161
|
},
|
|
162
162
|
)
|
|
163
163
|
|
|
164
|
-
def load_data(self) -> None:
|
|
164
|
+
def load_data(self, num_proc: int = 1, **kwargs) -> None:
|
|
165
165
|
if self.data_loaded:
|
|
166
166
|
return
|
|
167
167
|
|
|
@@ -16,6 +16,7 @@ def _load_data(
|
|
|
16
16
|
splits: list[str],
|
|
17
17
|
langs: list | None = None,
|
|
18
18
|
revision: str | None = None,
|
|
19
|
+
num_proc: int = 1,
|
|
19
20
|
):
|
|
20
21
|
if langs is None:
|
|
21
22
|
corpus = {}
|
|
@@ -32,6 +33,7 @@ def _load_data(
|
|
|
32
33
|
"queries",
|
|
33
34
|
split=split,
|
|
34
35
|
revision=revision,
|
|
36
|
+
num_proc=num_proc,
|
|
35
37
|
)
|
|
36
38
|
query_ds = query_ds.map(
|
|
37
39
|
lambda x: {
|
|
@@ -40,6 +42,7 @@ def _load_data(
|
|
|
40
42
|
"modality": "text",
|
|
41
43
|
},
|
|
42
44
|
remove_columns=["query-id", "query"],
|
|
45
|
+
num_proc=num_proc,
|
|
43
46
|
)
|
|
44
47
|
|
|
45
48
|
corpus_ds = load_dataset(
|
|
@@ -47,6 +50,7 @@ def _load_data(
|
|
|
47
50
|
"corpus",
|
|
48
51
|
split=split,
|
|
49
52
|
revision=revision,
|
|
53
|
+
num_proc=num_proc,
|
|
50
54
|
)
|
|
51
55
|
corpus_ds = corpus_ds.map(
|
|
52
56
|
lambda x: {
|
|
@@ -54,6 +58,7 @@ def _load_data(
|
|
|
54
58
|
"modality": "image",
|
|
55
59
|
},
|
|
56
60
|
remove_columns=["corpus-id"],
|
|
61
|
+
num_proc=num_proc,
|
|
57
62
|
)
|
|
58
63
|
corpus_ds = corpus_ds.select_columns(["id", "image"])
|
|
59
64
|
|
|
@@ -62,6 +67,7 @@ def _load_data(
|
|
|
62
67
|
"qrels",
|
|
63
68
|
split=split,
|
|
64
69
|
revision=revision,
|
|
70
|
+
num_proc=num_proc,
|
|
65
71
|
)
|
|
66
72
|
|
|
67
73
|
if langs is None:
|
|
@@ -125,7 +131,7 @@ class Vidore2ESGReportsRetrieval(AbsTaskRetrieval):
|
|
|
125
131
|
prompt={"query": "Find a screenshot that relevant to the user's question."},
|
|
126
132
|
)
|
|
127
133
|
|
|
128
|
-
def load_data(self) -> None:
|
|
134
|
+
def load_data(self, num_proc: int = 1, **kwargs) -> None:
|
|
129
135
|
if self.data_loaded:
|
|
130
136
|
return
|
|
131
137
|
|
|
@@ -134,6 +140,7 @@ class Vidore2ESGReportsRetrieval(AbsTaskRetrieval):
|
|
|
134
140
|
splits=self.metadata.eval_splits,
|
|
135
141
|
langs=_LANGS.keys(),
|
|
136
142
|
revision=self.metadata.dataset["revision"],
|
|
143
|
+
num_proc=num_proc,
|
|
137
144
|
)
|
|
138
145
|
|
|
139
146
|
self.data_loaded = True
|
|
@@ -172,7 +179,7 @@ class Vidore2EconomicsReportsRetrieval(AbsTaskRetrieval):
|
|
|
172
179
|
prompt={"query": "Find a screenshot that relevant to the user's question."},
|
|
173
180
|
)
|
|
174
181
|
|
|
175
|
-
def load_data(self) -> None:
|
|
182
|
+
def load_data(self, num_proc: int = 1, **kwargs) -> None:
|
|
176
183
|
if self.data_loaded:
|
|
177
184
|
return
|
|
178
185
|
|
|
@@ -181,6 +188,7 @@ class Vidore2EconomicsReportsRetrieval(AbsTaskRetrieval):
|
|
|
181
188
|
splits=self.metadata.eval_splits,
|
|
182
189
|
langs=_LANGS.keys(),
|
|
183
190
|
revision=self.metadata.dataset["revision"],
|
|
191
|
+
num_proc=num_proc,
|
|
184
192
|
)
|
|
185
193
|
|
|
186
194
|
self.data_loaded = True
|
|
@@ -219,7 +227,7 @@ class Vidore2BioMedicalLecturesRetrieval(AbsTaskRetrieval):
|
|
|
219
227
|
prompt={"query": "Find a screenshot that relevant to the user's question."},
|
|
220
228
|
)
|
|
221
229
|
|
|
222
|
-
def load_data(self) -> None:
|
|
230
|
+
def load_data(self, num_proc: int = 1, **kwargs) -> None:
|
|
223
231
|
if self.data_loaded:
|
|
224
232
|
return
|
|
225
233
|
|
|
@@ -228,6 +236,7 @@ class Vidore2BioMedicalLecturesRetrieval(AbsTaskRetrieval):
|
|
|
228
236
|
splits=self.metadata.eval_splits,
|
|
229
237
|
langs=_LANGS.keys(),
|
|
230
238
|
revision=self.metadata.dataset["revision"],
|
|
239
|
+
num_proc=num_proc,
|
|
231
240
|
)
|
|
232
241
|
|
|
233
242
|
self.data_loaded = True
|
|
@@ -266,7 +275,7 @@ class Vidore2ESGReportsHLRetrieval(AbsTaskRetrieval):
|
|
|
266
275
|
prompt={"query": "Find a screenshot that relevant to the user's question."},
|
|
267
276
|
)
|
|
268
277
|
|
|
269
|
-
def load_data(self) -> None:
|
|
278
|
+
def load_data(self, num_proc: int = 1, **kwargs) -> None:
|
|
270
279
|
if self.data_loaded:
|
|
271
280
|
return
|
|
272
281
|
|
|
@@ -274,6 +283,7 @@ class Vidore2ESGReportsHLRetrieval(AbsTaskRetrieval):
|
|
|
274
283
|
path=self.metadata.dataset["path"],
|
|
275
284
|
splits=self.metadata.eval_splits,
|
|
276
285
|
revision=self.metadata.dataset["revision"],
|
|
286
|
+
num_proc=num_proc,
|
|
277
287
|
)
|
|
278
288
|
|
|
279
289
|
self.data_loaded = True
|