mteb 2.1.4__py3-none-any.whl → 2.7.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/__init__.py +6 -0
- mteb/_create_dataloaders.py +22 -20
- mteb/_evaluators/any_sts_evaluator.py +23 -14
- mteb/_evaluators/classification_metrics.py +54 -0
- mteb/_evaluators/clustering_evaluator.py +3 -3
- mteb/_evaluators/evaluator.py +4 -2
- mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +18 -11
- mteb/_evaluators/pair_classification_evaluator.py +34 -40
- mteb/_evaluators/retrieval_evaluator.py +2 -2
- mteb/_evaluators/retrieval_metrics.py +18 -17
- mteb/_evaluators/sklearn_evaluator.py +25 -37
- mteb/_evaluators/text/bitext_mining_evaluator.py +31 -19
- mteb/_evaluators/text/summarization_evaluator.py +27 -20
- mteb/_evaluators/zeroshot_classification_evaluator.py +7 -5
- mteb/abstasks/_data_filter/__init__.py +0 -0
- mteb/abstasks/_data_filter/filters.py +125 -0
- mteb/abstasks/_data_filter/task_pipelines.py +105 -0
- mteb/abstasks/_statistics_calculation.py +23 -11
- mteb/abstasks/_stratification.py +18 -18
- mteb/abstasks/abstask.py +35 -28
- mteb/abstasks/aggregate_task_metadata.py +1 -9
- mteb/abstasks/aggregated_task.py +10 -29
- mteb/abstasks/classification.py +15 -12
- mteb/abstasks/clustering.py +20 -16
- mteb/abstasks/clustering_legacy.py +13 -10
- mteb/abstasks/image/image_text_pair_classification.py +7 -4
- mteb/abstasks/multilabel_classification.py +33 -22
- mteb/abstasks/pair_classification.py +27 -11
- mteb/abstasks/regression.py +4 -4
- mteb/abstasks/retrieval.py +28 -24
- mteb/abstasks/retrieval_dataset_loaders.py +2 -2
- mteb/abstasks/sts.py +14 -4
- mteb/abstasks/task_metadata.py +32 -33
- mteb/abstasks/text/bitext_mining.py +39 -28
- mteb/abstasks/text/reranking.py +8 -6
- mteb/abstasks/text/summarization.py +10 -5
- mteb/abstasks/zeroshot_classification.py +8 -4
- mteb/benchmarks/_create_table.py +84 -37
- mteb/benchmarks/benchmark.py +77 -16
- mteb/benchmarks/benchmarks/__init__.py +12 -0
- mteb/benchmarks/benchmarks/benchmarks.py +361 -16
- mteb/benchmarks/get_benchmark.py +14 -53
- mteb/cache.py +227 -37
- mteb/cli/_display_tasks.py +2 -2
- mteb/cli/build_cli.py +110 -14
- mteb/cli/generate_model_card.py +43 -23
- mteb/deprecated_evaluator.py +71 -62
- mteb/descriptive_stats/BitextMining/RuSciBenchBitextMining.v2.json +61 -0
- mteb/descriptive_stats/Classification/HebrewSentimentAnalysis.v3.json +60 -0
- mteb/descriptive_stats/Classification/TurkishConstitutionalCourtViolation.json +54 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2CybersecurityRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EconomicRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EnergyRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2HrRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3ComputerScienceRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3EnergyRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceEnRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceFrRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3HrRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3IndustrialRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3NuclearRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PharmaceuticalsRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PhysicsRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3TelecomRetrieval.json +214 -0
- mteb/descriptive_stats/PairClassification/TERRa.V2.json +35 -0
- mteb/descriptive_stats/Reranking/JQaRARerankingLite.json +35 -0
- mteb/descriptive_stats/Reranking/JaCWIRRerankingLite.json +35 -0
- mteb/descriptive_stats/Reranking/MultiLongDocReranking.json +466 -0
- mteb/descriptive_stats/Retrieval/ArguAna-NL.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/ChemRxivRetrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/EuroPIRQRetrieval.json +116 -0
- mteb/descriptive_stats/Retrieval/JaCWIRRetrievalLite.json +30 -0
- mteb/descriptive_stats/Retrieval/JaqketRetrievalLite.json +30 -0
- mteb/descriptive_stats/Retrieval/MIRACLJaRetrievalLite.json +30 -0
- mteb/descriptive_stats/Retrieval/MrTyDiJaRetrievalLite.json +30 -0
- mteb/descriptive_stats/Retrieval/NFCorpus-NL.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoClimateFEVER-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoDBPedia-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoFEVER-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoHotpotQA-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoMSMARCO-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoNQ-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/SCIDOCS-NL.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/SQuADKorV1Retrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/SciFact-NL.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/TVPLRetrieval.json +30 -0
- mteb/evaluate.py +106 -75
- mteb/filter_tasks.py +25 -26
- mteb/get_tasks.py +29 -30
- mteb/languages/language_scripts.py +5 -3
- mteb/leaderboard/app.py +414 -151
- mteb/leaderboard/benchmark_selector.py +14 -5
- mteb/leaderboard/figures.py +13 -15
- mteb/leaderboard/table.py +82 -17
- mteb/load_results.py +12 -12
- mteb/models/__init__.py +4 -1
- mteb/models/abs_encoder.py +31 -23
- mteb/models/cache_wrappers/__init__.py +2 -1
- mteb/models/cache_wrappers/cache_backend_protocol.py +3 -5
- mteb/models/cache_wrappers/cache_backends/_hash_utils.py +7 -6
- mteb/models/cache_wrappers/cache_backends/faiss_cache.py +6 -2
- mteb/models/cache_wrappers/cache_backends/numpy_cache.py +43 -25
- mteb/models/cache_wrappers/cache_wrapper.py +3 -3
- mteb/models/get_model_meta.py +25 -118
- mteb/models/instruct_wrapper.py +33 -9
- mteb/models/model_implementations/align_models.py +8 -1
- mteb/models/model_implementations/amazon_models.py +1 -0
- mteb/models/model_implementations/andersborges.py +65 -0
- mteb/models/model_implementations/ara_models.py +9 -1
- mteb/models/model_implementations/arctic_models.py +16 -8
- mteb/models/model_implementations/b1ade_models.py +2 -1
- mteb/models/model_implementations/bedrock_models.py +4 -0
- mteb/models/model_implementations/bge_models.py +101 -17
- mteb/models/model_implementations/bica_model.py +35 -0
- mteb/models/model_implementations/blip2_models.py +13 -2
- mteb/models/model_implementations/blip_models.py +43 -16
- mteb/models/model_implementations/bm25.py +5 -4
- mteb/models/model_implementations/bmretriever_models.py +10 -4
- mteb/models/model_implementations/cadet_models.py +10 -1
- mteb/models/model_implementations/cde_models.py +25 -4
- mteb/models/model_implementations/clip_models.py +9 -6
- mteb/models/model_implementations/clips_models.py +100 -0
- mteb/models/model_implementations/codefuse_models.py +165 -3
- mteb/models/model_implementations/codesage_models.py +18 -3
- mteb/models/model_implementations/cohere_models.py +13 -6
- mteb/models/model_implementations/cohere_v.py +7 -2
- mteb/models/model_implementations/colpali_models.py +17 -9
- mteb/models/model_implementations/colqwen_models.py +275 -5
- mteb/models/model_implementations/colsmol_models.py +4 -2
- mteb/models/model_implementations/conan_models.py +2 -1
- mteb/models/model_implementations/dino_models.py +194 -23
- mteb/models/model_implementations/e5_instruct.py +27 -4
- mteb/models/model_implementations/e5_models.py +21 -110
- mteb/models/model_implementations/e5_v.py +7 -6
- mteb/models/model_implementations/eagerworks_models.py +164 -0
- mteb/models/model_implementations/emillykkejensen_models.py +91 -0
- mteb/models/model_implementations/en_code_retriever.py +2 -1
- mteb/models/model_implementations/euler_models.py +32 -0
- mteb/models/model_implementations/evaclip_models.py +4 -0
- mteb/models/model_implementations/fa_models.py +67 -9
- mteb/models/model_implementations/facebookai.py +205 -0
- mteb/models/model_implementations/geogpt_models.py +2 -1
- mteb/models/model_implementations/gme_v_models.py +17 -10
- mteb/models/model_implementations/google_models.py +17 -6
- mteb/models/model_implementations/granite_vision_embedding_models.py +8 -3
- mteb/models/model_implementations/gritlm_models.py +4 -2
- mteb/models/model_implementations/gte_models.py +99 -9
- mteb/models/model_implementations/hinvec_models.py +2 -1
- mteb/models/model_implementations/human.py +1 -0
- mteb/models/model_implementations/ibm_granite_models.py +36 -6
- mteb/models/model_implementations/inf_models.py +4 -2
- mteb/models/model_implementations/jasper_models.py +256 -3
- mteb/models/model_implementations/jina_clip.py +49 -10
- mteb/models/model_implementations/jina_models.py +222 -11
- mteb/models/model_implementations/kalm_models.py +203 -25
- mteb/models/model_implementations/kblab.py +37 -0
- mteb/models/model_implementations/kennethenevoldsen_models.py +74 -0
- mteb/models/model_implementations/kfst.py +25 -0
- mteb/models/model_implementations/kowshik24_models.py +32 -0
- mteb/models/model_implementations/lens_models.py +2 -0
- mteb/models/model_implementations/lgai_embedding_models.py +2 -1
- mteb/models/model_implementations/linq_models.py +4 -3
- mteb/models/model_implementations/listconranker.py +2 -2
- mteb/models/model_implementations/llm2clip_models.py +9 -6
- mteb/models/model_implementations/llm2vec_models.py +16 -8
- mteb/models/model_implementations/mcinext_models.py +7 -1
- mteb/models/model_implementations/mdbr_models.py +19 -3
- mteb/models/model_implementations/misc_models.py +422 -60
- mteb/models/model_implementations/mixedbread_ai_models.py +332 -0
- mteb/models/model_implementations/mme5_models.py +2 -1
- mteb/models/model_implementations/moco_models.py +15 -4
- mteb/models/model_implementations/mod_models.py +191 -0
- mteb/models/model_implementations/model2vec_models.py +27 -14
- mteb/models/model_implementations/moka_models.py +4 -1
- mteb/models/model_implementations/nbailab.py +70 -0
- mteb/models/model_implementations/no_instruct_sentence_models.py +3 -2
- mteb/models/model_implementations/nomic_models.py +173 -6
- mteb/models/model_implementations/nomic_models_vision.py +8 -3
- mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +32 -19
- mteb/models/model_implementations/nvidia_models.py +155 -20
- mteb/models/model_implementations/octen_models.py +254 -0
- mteb/models/model_implementations/openai_models.py +20 -16
- mteb/models/model_implementations/openclip_models.py +37 -13
- mteb/models/model_implementations/opensearch_neural_sparse_models.py +10 -5
- mteb/models/model_implementations/ops_moa_models.py +5 -3
- mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +1 -1
- mteb/models/model_implementations/pawan_models.py +39 -0
- mteb/models/model_implementations/piccolo_models.py +9 -1
- mteb/models/model_implementations/pixie_models.py +56 -0
- mteb/models/model_implementations/promptriever_models.py +12 -8
- mteb/models/model_implementations/pylate_models.py +46 -12
- mteb/models/model_implementations/qodo_models.py +4 -2
- mteb/models/model_implementations/qtack_models.py +2 -1
- mteb/models/model_implementations/qwen3_models.py +9 -6
- mteb/models/model_implementations/qzhou_models.py +5 -3
- mteb/models/model_implementations/random_baseline.py +19 -24
- mteb/models/model_implementations/rasgaard_models.py +34 -0
- mteb/models/model_implementations/reasonir_model.py +2 -1
- mteb/models/model_implementations/repllama_models.py +5 -3
- mteb/models/model_implementations/rerankers_custom.py +15 -9
- mteb/models/model_implementations/rerankers_monot5_based.py +31 -31
- mteb/models/model_implementations/richinfoai_models.py +2 -1
- mteb/models/model_implementations/ru_sentence_models.py +71 -20
- mteb/models/model_implementations/ruri_models.py +322 -0
- mteb/models/model_implementations/salesforce_models.py +6 -3
- mteb/models/model_implementations/samilpwc_models.py +2 -1
- mteb/models/model_implementations/sarashina_embedding_models.py +168 -0
- mteb/models/model_implementations/searchmap_models.py +2 -1
- mteb/models/model_implementations/seed_1_6_embedding_models.py +8 -2
- mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +625 -0
- mteb/models/model_implementations/seed_models.py +1 -0
- mteb/models/model_implementations/sentence_transformers_models.py +177 -18
- mteb/models/model_implementations/shuu_model.py +32 -31
- mteb/models/model_implementations/siglip_models.py +30 -20
- mteb/models/model_implementations/slm_models.py +416 -0
- mteb/models/model_implementations/sonar_models.py +1 -0
- mteb/models/model_implementations/spartan8806_atles_champion.py +34 -0
- mteb/models/model_implementations/stella_models.py +23 -4
- mteb/models/model_implementations/tarka_models.py +376 -0
- mteb/models/model_implementations/text2vec_models.py +9 -3
- mteb/models/model_implementations/ua_sentence_models.py +11 -1
- mteb/models/model_implementations/uae_models.py +8 -1
- mteb/models/model_implementations/vdr_models.py +3 -1
- mteb/models/model_implementations/vi_vn_models.py +45 -6
- mteb/models/model_implementations/vista_models.py +2 -0
- mteb/models/model_implementations/vlm2vec_models.py +5 -3
- mteb/models/model_implementations/voyage_models.py +99 -0
- mteb/models/model_implementations/voyage_v.py +17 -9
- mteb/models/model_implementations/xyz_models.py +1 -0
- mteb/models/model_implementations/youtu_models.py +2 -1
- mteb/models/model_implementations/yuan_models.py +34 -0
- mteb/models/model_implementations/yuan_models_en.py +58 -0
- mteb/models/model_meta.py +498 -29
- mteb/models/models_protocols.py +22 -6
- mteb/models/search_encoder_index/__init__.py +7 -0
- mteb/models/search_encoder_index/search_backend_protocol.py +50 -0
- mteb/models/search_encoder_index/search_indexes/__init__.py +5 -0
- mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +160 -0
- mteb/models/search_wrappers.py +197 -65
- mteb/models/sentence_transformer_wrapper.py +52 -32
- mteb/models/vllm_wrapper.py +327 -0
- mteb/py.typed +0 -0
- mteb/results/benchmark_results.py +114 -65
- mteb/results/model_result.py +63 -26
- mteb/results/task_result.py +117 -77
- mteb/similarity_functions.py +60 -7
- mteb/tasks/bitext_mining/multilingual/__init__.py +2 -1
- mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +4 -2
- mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining_fast.py +1 -1
- mteb/tasks/bitext_mining/multilingual/ru_sci_bench_bitext_mining.py +47 -5
- mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -6
- mteb/tasks/classification/ara/ajgt.py +1 -2
- mteb/tasks/classification/ara/hotel_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/ara/tweet_emotion_classification.py +1 -2
- mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -2
- mteb/tasks/classification/ben/bengali_document_classification.py +1 -2
- mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -2
- mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -2
- mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/ces/czech_product_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/ces/czech_so_me_sentiment_classification.py +1 -2
- mteb/tasks/classification/dan/angry_tweets_classification.py +1 -2
- mteb/tasks/classification/dan/danish_political_comments_classification.py +1 -2
- mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -2
- mteb/tasks/classification/dan/dk_hate_classification.py +2 -3
- mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -2
- mteb/tasks/classification/deu/ten_k_gnad_classification.py +1 -2
- mteb/tasks/classification/eng/amazon_polarity_classification.py +1 -2
- mteb/tasks/classification/eng/arxiv_classification.py +1 -2
- mteb/tasks/classification/eng/banking77_classification.py +1 -2
- mteb/tasks/classification/eng/dbpedia_classification.py +1 -2
- mteb/tasks/classification/eng/emotion_classification.py +1 -2
- mteb/tasks/classification/eng/financial_phrasebank_classification.py +1 -2
- mteb/tasks/classification/eng/frenk_en_classification.py +1 -2
- mteb/tasks/classification/eng/gtsrb_classification.py +1 -1
- mteb/tasks/classification/eng/imdb_classification.py +1 -2
- mteb/tasks/classification/eng/legal_bench_classification.py +14 -120
- mteb/tasks/classification/eng/news_classification.py +1 -2
- mteb/tasks/classification/eng/patch_camelyon_classification.py +1 -1
- mteb/tasks/classification/eng/patent_classification.py +1 -2
- mteb/tasks/classification/eng/poem_sentiment_classification.py +1 -2
- mteb/tasks/classification/eng/sds_eye_protection_classification.py +1 -2
- mteb/tasks/classification/eng/sds_gloves_classification.py +1 -2
- mteb/tasks/classification/eng/toxic_chat_classification.py +2 -19
- mteb/tasks/classification/eng/toxic_conversations_classification.py +1 -2
- mteb/tasks/classification/eng/tweet_sentiment_extraction_classification.py +1 -2
- mteb/tasks/classification/eng/tweet_topic_single_classification.py +2 -13
- mteb/tasks/classification/eng/ucf101_classification.py +1 -5
- mteb/tasks/classification/eng/wikipedia_bio_met_chem_classification.py +1 -2
- mteb/tasks/classification/eng/wikipedia_chem_fields_classification.py +1 -2
- mteb/tasks/classification/eng/wikipedia_comp_chem_spectroscopy_classification.py +1 -2
- mteb/tasks/classification/eng/wikipedia_crystallography_analytical_classification.py +1 -2
- mteb/tasks/classification/eng/wikipedia_theoretical_applied_classification.py +1 -2
- mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -2
- mteb/tasks/classification/eng/yelp_review_full_classification.py +1 -2
- mteb/tasks/classification/est/estonian_valence.py +2 -3
- mteb/tasks/classification/fas/fa_mteb_classification.py +7 -14
- mteb/tasks/classification/fil/filipino_hate_speech_classification.py +1 -2
- mteb/tasks/classification/fin/fin_toxicity_classification.py +2 -11
- mteb/tasks/classification/fra/french_book_reviews.py +1 -2
- mteb/tasks/classification/fra/movie_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/guj/gujarati_news_classification.py +1 -2
- mteb/tasks/classification/heb/__init__.py +6 -1
- mteb/tasks/classification/heb/hebrew_sentiment_analysis.py +62 -4
- mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -2
- mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -2
- mteb/tasks/classification/hrv/frenk_hr_classification.py +1 -2
- mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +1 -2
- mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -2
- mteb/tasks/classification/ita/italian_linguist_acceptability_classification.py +1 -2
- mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -2
- mteb/tasks/classification/jpn/wrime_classification.py +1 -2
- mteb/tasks/classification/kan/kannada_news_classification.py +1 -2
- mteb/tasks/classification/kor/klue_tc.py +1 -2
- mteb/tasks/classification/kor/kor_hate_classification.py +2 -17
- mteb/tasks/classification/kor/kor_sarcasm_classification.py +2 -19
- mteb/tasks/classification/kur/kurdish_sentiment_classification.py +3 -4
- mteb/tasks/classification/mal/malayalam_news_classification.py +1 -2
- mteb/tasks/classification/mar/marathi_news_classification.py +1 -2
- mteb/tasks/classification/mkd/macedonian_tweet_sentiment_classification.py +1 -2
- mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -6
- mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -4
- mteb/tasks/classification/multilingual/ru_sci_bench_classification.py +4 -23
- mteb/tasks/classification/multilingual/scala_classification.py +2 -3
- mteb/tasks/classification/multilingual/sib200_classification.py +1 -6
- mteb/tasks/classification/mya/myanmar_news.py +1 -2
- mteb/tasks/classification/nep/nepali_news_classification.py +1 -2
- mteb/tasks/classification/nld/dutch_book_review_sentiment_classification.py +4 -2
- mteb/tasks/classification/nld/dutch_cola_classification.py +3 -0
- mteb/tasks/classification/nld/dutch_government_bias_classification.py +3 -0
- mteb/tasks/classification/nld/dutch_news_articles_classification.py +3 -0
- mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +3 -0
- mteb/tasks/classification/nld/iconclass_classification.py +3 -0
- mteb/tasks/classification/nld/open_tender_classification.py +3 -0
- mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +3 -0
- mteb/tasks/classification/nob/no_rec_classification.py +1 -2
- mteb/tasks/classification/nob/norwegian_parliament_classification.py +1 -2
- mteb/tasks/classification/ory/odia_news_classification.py +1 -2
- mteb/tasks/classification/pol/polish_classification.py +3 -6
- mteb/tasks/classification/ron/moroco.py +1 -2
- mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -2
- mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -2
- mteb/tasks/classification/rus/georeview_classification.py +1 -2
- mteb/tasks/classification/rus/headline_classification.py +1 -2
- mteb/tasks/classification/rus/inappropriateness_classification.py +1 -2
- mteb/tasks/classification/rus/ru_reviews_classification.py +1 -2
- mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -2
- mteb/tasks/classification/rus/senti_ru_eval.py +1 -2
- mteb/tasks/classification/sin/sinhala_news_classification.py +1 -2
- mteb/tasks/classification/sin/sinhala_news_source_classification.py +1 -2
- mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/slk/slovak_hate_speech_classification.py +1 -2
- mteb/tasks/classification/slk/slovak_movie_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/slv/frenk_sl_classification.py +1 -2
- mteb/tasks/classification/spa/spanish_news_classification.py +1 -2
- mteb/tasks/classification/spa/spanish_sentiment_classification.py +1 -2
- mteb/tasks/classification/ssw/siswati_news_classification.py +1 -2
- mteb/tasks/classification/swa/swahili_news_classification.py +1 -2
- mteb/tasks/classification/swe/dalaj_classification.py +1 -2
- mteb/tasks/classification/swe/swe_rec_classification.py +1 -2
- mteb/tasks/classification/swe/swedish_sentiment_classification.py +1 -2
- mteb/tasks/classification/tam/tamil_news_classification.py +1 -2
- mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +1 -2
- mteb/tasks/classification/tha/wisesight_sentiment_classification.py +1 -2
- mteb/tasks/classification/tsn/tswana_news_classification.py +1 -2
- mteb/tasks/classification/tur/__init__.py +4 -0
- mteb/tasks/classification/tur/turkish_constitutional_court.py +41 -0
- mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +1 -2
- mteb/tasks/classification/tur/turkish_product_sentiment_classification.py +1 -2
- mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -15
- mteb/tasks/classification/urd/urdu_roman_sentiment_classification.py +1 -2
- mteb/tasks/classification/vie/amazon_counterfactual_vn_classification.py +1 -6
- mteb/tasks/classification/vie/amazon_polarity_vn_classification.py +1 -6
- mteb/tasks/classification/vie/amazon_reviews_vn_classification.py +1 -5
- mteb/tasks/classification/vie/banking77_vn_classification.py +1 -5
- mteb/tasks/classification/vie/emotion_vn_classification.py +1 -5
- mteb/tasks/classification/vie/imdb_vn_classification.py +1 -5
- mteb/tasks/classification/vie/massive_intent_vn_classification.py +1 -5
- mteb/tasks/classification/vie/massive_scenario_vn_classification.py +1 -5
- mteb/tasks/classification/vie/mtop_domain_vn_classification.py +1 -5
- mteb/tasks/classification/vie/mtop_intent_vn_classification.py +1 -5
- mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -5
- mteb/tasks/classification/vie/tweet_sentiment_extraction_vn_classification.py +1 -5
- mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -2
- mteb/tasks/classification/zho/cmteb_classification.py +5 -10
- mteb/tasks/classification/zho/yue_openrice_review_classification.py +1 -2
- mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -2
- mteb/tasks/clustering/eng/hume_wiki_cities_clustering.py +1 -1
- mteb/tasks/clustering/eng/wiki_cities_clustering.py +1 -1
- mteb/tasks/clustering/jpn/mews_c16_ja_clustering.py +1 -3
- mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -6
- mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +3 -0
- mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +3 -0
- mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +3 -0
- mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +3 -0
- mteb/tasks/clustering/nld/open_tender_clustering_s2s.py +3 -0
- mteb/tasks/clustering/nld/vabb_clustering_p2p.py +3 -0
- mteb/tasks/clustering/nld/vabb_clustering_s2s.py +3 -0
- mteb/tasks/clustering/vie/reddit_clustering_p2p_vn.py +1 -5
- mteb/tasks/clustering/vie/reddit_clustering_vn.py +1 -5
- mteb/tasks/clustering/vie/stack_exchange_clustering_p2p_vn.py +1 -5
- mteb/tasks/clustering/vie/stack_exchange_clustering_vn.py +1 -5
- mteb/tasks/clustering/vie/twenty_newsgroups_clustering_vn.py +1 -5
- mteb/tasks/clustering/zho/cmteb_clustering.py +2 -2
- mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +1 -1
- mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -5
- mteb/tasks/multilabel_classification/kor/kor_hate_speech_ml_classification.py +1 -9
- mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -6
- mteb/tasks/multilabel_classification/nld/covid_disinformation_nl_multi_label_classification.py +3 -0
- mteb/tasks/multilabel_classification/nld/vabb_multi_label_classification.py +3 -0
- mteb/tasks/multilabel_classification/por/brazilian_toxic_tweets_classification.py +1 -6
- mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
- mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -2
- mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -6
- mteb/tasks/pair_classification/eng/legal_bench_pc.py +1 -9
- mteb/tasks/pair_classification/nld/sick_nl_pair_classification.py +3 -0
- mteb/tasks/pair_classification/nld/xlwic_nl_pair_classification.py +3 -0
- mteb/tasks/pair_classification/rus/__init__.py +2 -2
- mteb/tasks/pair_classification/rus/terra.py +51 -25
- mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -5
- mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -5
- mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -5
- mteb/tasks/regression/multilingual/ru_sci_bench_regression.py +2 -6
- mteb/tasks/reranking/jpn/__init__.py +9 -1
- mteb/tasks/reranking/jpn/j_qa_ra_reranking_lite.py +49 -0
- mteb/tasks/reranking/jpn/ja_cwir_reranking_lite.py +47 -0
- mteb/tasks/reranking/multilingual/__init__.py +2 -0
- mteb/tasks/reranking/multilingual/multi_long_doc_reranking.py +70 -0
- mteb/tasks/reranking/multilingual/wikipedia_reranking_multilingual.py +1 -1
- mteb/tasks/reranking/multilingual/x_glue_wpr_reranking.py +1 -2
- mteb/tasks/reranking/vie/ask_ubuntu_dup_questions_vn.py +1 -5
- mteb/tasks/reranking/vie/sci_docs_reranking_vn.py +1 -5
- mteb/tasks/reranking/vie/stack_overflow_dup_questions_vn.py +1 -5
- mteb/tasks/retrieval/code/code_rag.py +12 -12
- mteb/tasks/retrieval/code/fresh_stack_retrieval.py +8 -5
- mteb/tasks/retrieval/dan/dan_fever_retrieval.py +1 -1
- mteb/tasks/retrieval/dan/tv2_nordretrieval.py +2 -2
- mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +2 -2
- mteb/tasks/retrieval/eng/__init__.py +2 -0
- mteb/tasks/retrieval/eng/chemrxiv.py +33 -0
- mteb/tasks/retrieval/eng/cub200_i2i_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -8
- mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +4 -0
- mteb/tasks/retrieval/jpn/__init__.py +8 -0
- mteb/tasks/retrieval/jpn/ja_cwir_retrieval.py +1 -4
- mteb/tasks/retrieval/jpn/ja_cwir_retrieval_lite.py +47 -0
- mteb/tasks/retrieval/jpn/jaqket_retrieval_lite.py +50 -0
- mteb/tasks/retrieval/jpn/miracl_ja_retrieval_lite.py +52 -0
- mteb/tasks/retrieval/jpn/mr_tydi_ja_retrieval_lite.py +48 -0
- mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +11 -4
- mteb/tasks/retrieval/kor/__init__.py +16 -1
- mteb/tasks/retrieval/kor/kovidore2_bench_retrieval.py +142 -0
- mteb/tasks/retrieval/kor/squad_kor_v1_retrieval.py +47 -0
- mteb/tasks/retrieval/multilingual/__init__.py +24 -0
- mteb/tasks/retrieval/multilingual/belebele_retrieval.py +5 -4
- mteb/tasks/retrieval/multilingual/euro_pirq_retrieval.py +43 -0
- mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +56 -42
- mteb/tasks/retrieval/multilingual/mkqa_retrieval.py +1 -2
- mteb/tasks/retrieval/multilingual/mlqa_retrieval.py +1 -4
- mteb/tasks/retrieval/multilingual/multi_long_doc_retrieval.py +1 -2
- mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +9 -4
- mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +2 -12
- mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +4 -2
- mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +389 -0
- mteb/tasks/retrieval/nld/__init__.py +8 -4
- mteb/tasks/retrieval/nld/argu_ana_nl_retrieval.py +46 -27
- mteb/tasks/retrieval/nld/bbsard_nl_retrieval.py +3 -0
- mteb/tasks/retrieval/nld/dutch_news_articles_retrieval.py +3 -0
- mteb/tasks/retrieval/nld/legal_qa_nl_retrieval.py +3 -0
- mteb/tasks/retrieval/nld/nf_corpus_nl_retrieval.py +42 -25
- mteb/tasks/retrieval/nld/open_tender_retrieval.py +3 -0
- mteb/tasks/retrieval/nld/sci_fact_nl_retrieval.py +42 -24
- mteb/tasks/retrieval/nld/scidocsnl_retrieval.py +44 -27
- mteb/tasks/retrieval/nld/vabb_retrieval.py +3 -0
- mteb/tasks/retrieval/nob/norquad.py +2 -2
- mteb/tasks/retrieval/nob/snl_retrieval.py +2 -2
- mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -7
- mteb/tasks/retrieval/tur/tur_hist_quad.py +1 -1
- mteb/tasks/retrieval/vie/__init__.py +14 -6
- mteb/tasks/retrieval/vie/argu_ana_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +40 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_android_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_gis_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_mathematica_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_physics_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_programmers_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_stats_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_tex_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_unix_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_webmasters_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_wordpress_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +40 -5
- mteb/tasks/retrieval/vie/fevervn_retrieval.py +40 -7
- mteb/tasks/retrieval/vie/fi_qa2018_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/green_node_table_markdown_retrieval.py +16 -1
- mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +40 -6
- mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +49 -5
- mteb/tasks/retrieval/vie/nf_corpus_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/nqvn_retrieval.py +40 -5
- mteb/tasks/retrieval/vie/quora_vn_retrieval.py +1 -6
- mteb/tasks/retrieval/vie/sci_fact_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/scidocsvn_retrieval.py +1 -6
- mteb/tasks/retrieval/vie/touche2020_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/treccovidvn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/tvpl_retrieval.py +42 -0
- mteb/tasks/retrieval/vie/zac_legal_text_retrieval.py +15 -1
- mteb/tasks/sts/nld/sick_nl_sts.py +1 -0
- mteb/tasks/sts/vie/biosses_stsvn.py +1 -5
- mteb/tasks/sts/vie/sickr_stsvn.py +1 -5
- mteb/tasks/sts/vie/sts_benchmark_stsvn.py +1 -5
- mteb/tasks/zeroshot_classification/eng/gtsrb.py +1 -1
- mteb/tasks/zeroshot_classification/eng/patch_camelyon.py +1 -1
- mteb/tasks/zeroshot_classification/eng/ucf101.py +1 -5
- mteb/types/__init__.py +2 -0
- mteb/types/_encoder_io.py +19 -2
- mteb/types/_result.py +2 -1
- mteb/types/statistics.py +9 -3
- {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/METADATA +25 -8
- {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/RECORD +525 -438
- mteb/models/model_implementations/mxbai_models.py +0 -102
- mteb/models/model_implementations/nb_sbert.py +0 -25
- {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/WHEEL +0 -0
- {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/entry_points.txt +0 -0
- {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/top_level.txt +0 -0
|
@@ -48,14 +48,14 @@ class CodeRAGProgrammingSolutionsRetrieval(AbsTaskRetrieval):
|
|
|
48
48
|
"path": "code-rag-bench/programming-solutions",
|
|
49
49
|
"revision": "1064f7bba54d5400d4836f5831fe4c2332a566a6",
|
|
50
50
|
},
|
|
51
|
-
**common_args,
|
|
51
|
+
**common_args,
|
|
52
52
|
)
|
|
53
53
|
|
|
54
54
|
def load_data(self) -> None:
|
|
55
55
|
"""Load dataset from HuggingFace hub"""
|
|
56
56
|
if self.data_loaded:
|
|
57
57
|
return
|
|
58
|
-
self.dataset = datasets.load_dataset(**self.metadata.dataset)
|
|
58
|
+
self.dataset = datasets.load_dataset(**self.metadata.dataset)
|
|
59
59
|
self.dataset_transform()
|
|
60
60
|
self.data_loaded = True
|
|
61
61
|
|
|
@@ -71,7 +71,7 @@ class CodeRAGProgrammingSolutionsRetrieval(AbsTaskRetrieval):
|
|
|
71
71
|
self.queries = {}
|
|
72
72
|
|
|
73
73
|
split = self.metadata.eval_splits[0]
|
|
74
|
-
ds: datasets.Dataset = self.dataset[split]
|
|
74
|
+
ds: datasets.Dataset = self.dataset[split]
|
|
75
75
|
ds = ds.shuffle(seed=42)
|
|
76
76
|
|
|
77
77
|
self.queries[split] = {}
|
|
@@ -105,14 +105,14 @@ class CodeRAGOnlineTutorialsRetrieval(AbsTaskRetrieval):
|
|
|
105
105
|
"path": "code-rag-bench/online-tutorials",
|
|
106
106
|
"revision": "095bb77130082e4690d6c3a031997b03487bf6e2",
|
|
107
107
|
},
|
|
108
|
-
**common_args,
|
|
108
|
+
**common_args,
|
|
109
109
|
)
|
|
110
110
|
|
|
111
111
|
def load_data(self) -> None:
|
|
112
112
|
"""Load dataset from HuggingFace hub"""
|
|
113
113
|
if self.data_loaded:
|
|
114
114
|
return
|
|
115
|
-
self.dataset = datasets.load_dataset(**self.metadata.dataset)
|
|
115
|
+
self.dataset = datasets.load_dataset(**self.metadata.dataset)
|
|
116
116
|
self.dataset_transform()
|
|
117
117
|
self.data_loaded = True
|
|
118
118
|
|
|
@@ -128,7 +128,7 @@ class CodeRAGOnlineTutorialsRetrieval(AbsTaskRetrieval):
|
|
|
128
128
|
self.queries = {}
|
|
129
129
|
|
|
130
130
|
split = self.metadata.eval_splits[0]
|
|
131
|
-
ds: datasets.Dataset = self.dataset[split]
|
|
131
|
+
ds: datasets.Dataset = self.dataset[split]
|
|
132
132
|
ds = ds.shuffle(seed=42)
|
|
133
133
|
|
|
134
134
|
self.queries[split] = {}
|
|
@@ -165,14 +165,14 @@ class CodeRAGLibraryDocumentationSolutionsRetrieval(AbsTaskRetrieval):
|
|
|
165
165
|
"path": "code-rag-bench/library-documentation",
|
|
166
166
|
"revision": "b530d3b5a25087d2074e731b76232db85b9e9107",
|
|
167
167
|
},
|
|
168
|
-
**common_args,
|
|
168
|
+
**common_args,
|
|
169
169
|
)
|
|
170
170
|
|
|
171
171
|
def load_data(self) -> None:
|
|
172
172
|
"""Load dataset from HuggingFace hub"""
|
|
173
173
|
if self.data_loaded:
|
|
174
174
|
return
|
|
175
|
-
self.dataset = datasets.load_dataset(**self.metadata.dataset)
|
|
175
|
+
self.dataset = datasets.load_dataset(**self.metadata.dataset)
|
|
176
176
|
self.dataset_transform()
|
|
177
177
|
self.data_loaded = True
|
|
178
178
|
|
|
@@ -188,7 +188,7 @@ class CodeRAGLibraryDocumentationSolutionsRetrieval(AbsTaskRetrieval):
|
|
|
188
188
|
self.queries = {}
|
|
189
189
|
|
|
190
190
|
split = self.metadata.eval_splits[0]
|
|
191
|
-
ds: datasets.Dataset = self.dataset[split]
|
|
191
|
+
ds: datasets.Dataset = self.dataset[split]
|
|
192
192
|
ds = ds.shuffle(seed=42)
|
|
193
193
|
|
|
194
194
|
self.queries[split] = {}
|
|
@@ -222,14 +222,14 @@ class CodeRAGStackoverflowPostsRetrieval(AbsTaskRetrieval):
|
|
|
222
222
|
"path": "code-rag-bench/stackoverflow-posts",
|
|
223
223
|
"revision": "04e05d86cb0ac467b29a5d87f4c56eac99dfc0a4",
|
|
224
224
|
},
|
|
225
|
-
**common_args,
|
|
225
|
+
**common_args,
|
|
226
226
|
)
|
|
227
227
|
|
|
228
228
|
def load_data(self) -> None:
|
|
229
229
|
"""Load dataset from HuggingFace hub"""
|
|
230
230
|
if self.data_loaded:
|
|
231
231
|
return
|
|
232
|
-
self.dataset = datasets.load_dataset(**self.metadata.dataset)
|
|
232
|
+
self.dataset = datasets.load_dataset(**self.metadata.dataset)
|
|
233
233
|
self.dataset_transform()
|
|
234
234
|
self.data_loaded = True
|
|
235
235
|
|
|
@@ -245,7 +245,7 @@ class CodeRAGStackoverflowPostsRetrieval(AbsTaskRetrieval):
|
|
|
245
245
|
self.queries = {}
|
|
246
246
|
|
|
247
247
|
split = self.metadata.eval_splits[0]
|
|
248
|
-
ds: datasets.Dataset = self.dataset[split]
|
|
248
|
+
ds: datasets.Dataset = self.dataset[split]
|
|
249
249
|
ds = ds.shuffle(seed=42)
|
|
250
250
|
|
|
251
251
|
self.queries[split] = {}
|
|
@@ -25,11 +25,14 @@ class FreshStackRetrieval(AbsTaskRetrieval):
|
|
|
25
25
|
dialect=[],
|
|
26
26
|
sample_creation="found",
|
|
27
27
|
bibtex_citation=r"""
|
|
28
|
-
@
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
28
|
+
@misc{thakur2025freshstackbuildingrealisticbenchmarks,
|
|
29
|
+
archiveprefix = {arXiv},
|
|
30
|
+
author = {Nandan Thakur and Jimmy Lin and Sam Havens and Michael Carbin and Omar Khattab and Andrew Drozdov},
|
|
31
|
+
eprint = {2504.13128},
|
|
32
|
+
primaryclass = {cs.IR},
|
|
33
|
+
title = {FreshStack: Building Realistic Benchmarks for Evaluating Retrieval on Technical Documents},
|
|
34
|
+
url = {https://arxiv.org/abs/2504.13128},
|
|
35
|
+
year = {2025},
|
|
33
36
|
}
|
|
34
37
|
""",
|
|
35
38
|
)
|
|
@@ -51,7 +51,7 @@ Derczynski, Leon},
|
|
|
51
51
|
"""Load dataset from HuggingFace hub"""
|
|
52
52
|
if self.data_loaded:
|
|
53
53
|
return
|
|
54
|
-
self.dataset = datasets.load_dataset(**self.metadata.dataset)
|
|
54
|
+
self.dataset = datasets.load_dataset(**self.metadata.dataset)
|
|
55
55
|
self.dataset_transform()
|
|
56
56
|
self.data_loaded = True
|
|
57
57
|
|
|
@@ -64,7 +64,7 @@ Piperidis, Stelios},
|
|
|
64
64
|
"""Load dataset from HuggingFace hub"""
|
|
65
65
|
if self.data_loaded:
|
|
66
66
|
return
|
|
67
|
-
self.dataset = datasets.load_dataset(**self.metadata.dataset)
|
|
67
|
+
self.dataset = datasets.load_dataset(**self.metadata.dataset)
|
|
68
68
|
self.dataset_transform()
|
|
69
69
|
self.data_loaded = True
|
|
70
70
|
|
|
@@ -81,7 +81,7 @@ Piperidis, Stelios},
|
|
|
81
81
|
text2id = {}
|
|
82
82
|
|
|
83
83
|
for split in self.dataset:
|
|
84
|
-
ds: datasets.Dataset = self.dataset[split]
|
|
84
|
+
ds: datasets.Dataset = self.dataset[split]
|
|
85
85
|
ds = ds.shuffle(seed=42)
|
|
86
86
|
ds = ds.select(
|
|
87
87
|
range(2048)
|
|
@@ -40,7 +40,7 @@ class TwitterHjerneRetrieval(AbsTaskRetrieval):
|
|
|
40
40
|
"""Load dataset from HuggingFace hub"""
|
|
41
41
|
if self.data_loaded:
|
|
42
42
|
return
|
|
43
|
-
self.dataset = datasets.load_dataset(**self.metadata.dataset)
|
|
43
|
+
self.dataset = datasets.load_dataset(**self.metadata.dataset)
|
|
44
44
|
self.dataset_transform()
|
|
45
45
|
self.data_loaded = True
|
|
46
46
|
|
|
@@ -57,7 +57,7 @@ class TwitterHjerneRetrieval(AbsTaskRetrieval):
|
|
|
57
57
|
text2id = {}
|
|
58
58
|
|
|
59
59
|
for split in self.dataset:
|
|
60
|
-
ds: datasets.Dataset = self.dataset[split]
|
|
60
|
+
ds: datasets.Dataset = self.dataset[split]
|
|
61
61
|
ds = ds.map(answers_to_list)
|
|
62
62
|
|
|
63
63
|
self.queries[split] = {}
|
|
@@ -18,6 +18,7 @@ from .built_bench_retrieval import BuiltBenchRetrieval
|
|
|
18
18
|
from .chat_doctor_retrieval import ChatDoctorRetrieval
|
|
19
19
|
from .chem_hotpot_qa_retrieval import ChemHotpotQARetrieval
|
|
20
20
|
from .chem_nq_retrieval import ChemNQRetrieval
|
|
21
|
+
from .chemrxiv import ChemRxivRetrieval
|
|
21
22
|
from .cirr_it2i_retrieval import CIRRIT2IRetrieval
|
|
22
23
|
from .climate_fever_retrieval import (
|
|
23
24
|
ClimateFEVER,
|
|
@@ -254,6 +255,7 @@ __all__ = [
|
|
|
254
255
|
"ChatDoctorRetrieval",
|
|
255
256
|
"ChemHotpotQARetrieval",
|
|
256
257
|
"ChemNQRetrieval",
|
|
258
|
+
"ChemRxivRetrieval",
|
|
257
259
|
"ClimateFEVER",
|
|
258
260
|
"ClimateFEVERHardNegatives",
|
|
259
261
|
"ClimateFEVERHardNegativesV2",
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
from mteb.abstasks.retrieval import AbsTaskRetrieval
|
|
2
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class ChemRxivRetrieval(AbsTaskRetrieval):
|
|
6
|
+
metadata = TaskMetadata(
|
|
7
|
+
name="ChemRxivRetrieval",
|
|
8
|
+
dataset={
|
|
9
|
+
"path": "BASF-AI/ChemRxivRetrieval",
|
|
10
|
+
"revision": "5377aa18f309ec440ff6325a4c2cd3362c2cb8d7",
|
|
11
|
+
},
|
|
12
|
+
description="A retrieval task based on ChemRxiv papers where queries are LLM-synthesized to match specific paragraphs.",
|
|
13
|
+
reference="https://arxiv.org/abs/2508.01643",
|
|
14
|
+
type="Retrieval",
|
|
15
|
+
category="t2t",
|
|
16
|
+
modalities=["text"],
|
|
17
|
+
eval_splits=["test"],
|
|
18
|
+
eval_langs=["eng-Latn"],
|
|
19
|
+
main_score="ndcg_at_10",
|
|
20
|
+
date=("2025-01-01", "2025-05-01"),
|
|
21
|
+
domains=["Chemistry"],
|
|
22
|
+
task_subtypes=["Question answering", "Article retrieval"],
|
|
23
|
+
license="cc-by-nc-sa-4.0",
|
|
24
|
+
annotations_creators="LM-generated and reviewed",
|
|
25
|
+
dialect=[],
|
|
26
|
+
sample_creation="found",
|
|
27
|
+
bibtex_citation="""@article{kasmaee2025chembed,
|
|
28
|
+
author = {Kasmaee, Ali Shiraee and Khodadad, Mohammad and Astaraki, Mahdi and Saloot, Mohammad Arshi and Sherck, Nicholas and Mahyar, Hamidreza and Samiee, Soheila},
|
|
29
|
+
journal = {arXiv preprint arXiv:2508.01643},
|
|
30
|
+
title = {Chembed: Enhancing chemical literature search through domain-specific text embeddings},
|
|
31
|
+
year = {2025},
|
|
32
|
+
}""",
|
|
33
|
+
)
|
|
@@ -25,7 +25,7 @@ class CUB200I2I(AbsTaskRetrieval):
|
|
|
25
25
|
modalities=["image"],
|
|
26
26
|
sample_creation="created",
|
|
27
27
|
bibtex_citation=r"""
|
|
28
|
-
@article{
|
|
28
|
+
@article{welinder2010caltech,
|
|
29
29
|
author = {Welinder, Peter and Branson, Steve and Mita, Takeshi and Wah, Catherine and Schroff, Florian and Belongie, Serge and Perona, Pietro},
|
|
30
30
|
month = {09},
|
|
31
31
|
pages = {},
|
|
@@ -7,14 +7,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
|
|
|
7
7
|
class LitSearchRetrieval(AbsTaskRetrieval):
|
|
8
8
|
metadata = TaskMetadata(
|
|
9
9
|
name="LitSearchRetrieval",
|
|
10
|
-
description=""
|
|
11
|
-
The dataset contains the query set and retrieval corpus for the paper LitSearch: A Retrieval Benchmark for
|
|
12
|
-
Scientific Literature Search. It introduces LitSearch, a retrieval benchmark comprising 597 realistic literature
|
|
13
|
-
search queries about recent ML and NLP papers. LitSearch is constructed using a combination of (1) questions
|
|
14
|
-
generated by GPT-4 based on paragraphs containing inline citations from research papers and (2) questions about
|
|
15
|
-
recently published papers, manually written by their authors. All LitSearch questions were manually examined or
|
|
16
|
-
edited by experts to ensure high quality.
|
|
17
|
-
""",
|
|
10
|
+
description="The dataset contains the query set and retrieval corpus for the paper LitSearch: A Retrieval Benchmark for Scientific Literature Search. It introduces LitSearch, a retrieval benchmark comprising 597 realistic literature search queries about recent ML and NLP papers. LitSearch is constructed using a combination of (1) questions generated by GPT-4 based on paragraphs containing inline citations from research papers and (2) questions about recently published papers, manually written by their authors. All LitSearch questions were manually examined or edited by experts to ensure high quality.",
|
|
18
11
|
reference="https://github.com/princeton-nlp/LitSearch",
|
|
19
12
|
dataset={
|
|
20
13
|
"path": "princeton-nlp/LitSearch",
|
|
@@ -351,6 +351,7 @@ class VidoreSyntheticDocQAAIRetrieval(AbsTaskRetrieval):
|
|
|
351
351
|
}
|
|
352
352
|
""",
|
|
353
353
|
prompt={"query": "Find a screenshot that relevant to the user's question."},
|
|
354
|
+
adapted_from=["VidoreDocVQARetrieval"],
|
|
354
355
|
)
|
|
355
356
|
|
|
356
357
|
def load_data(self) -> None:
|
|
@@ -394,6 +395,7 @@ class VidoreSyntheticDocQAEnergyRetrieval(AbsTaskRetrieval):
|
|
|
394
395
|
}
|
|
395
396
|
""",
|
|
396
397
|
prompt={"query": "Find a screenshot that relevant to the user's question."},
|
|
398
|
+
adapted_from=["VidoreDocVQARetrieval"],
|
|
397
399
|
)
|
|
398
400
|
|
|
399
401
|
def load_data(self) -> None:
|
|
@@ -437,6 +439,7 @@ class VidoreSyntheticDocQAGovernmentReportsRetrieval(AbsTaskRetrieval):
|
|
|
437
439
|
}
|
|
438
440
|
""",
|
|
439
441
|
prompt={"query": "Find a screenshot that relevant to the user's question."},
|
|
442
|
+
adapted_from=["VidoreDocVQARetrieval"],
|
|
440
443
|
)
|
|
441
444
|
|
|
442
445
|
def load_data(self) -> None:
|
|
@@ -480,6 +483,7 @@ class VidoreSyntheticDocQAHealthcareIndustryRetrieval(AbsTaskRetrieval):
|
|
|
480
483
|
}
|
|
481
484
|
""",
|
|
482
485
|
prompt={"query": "Find a screenshot that relevant to the user's question."},
|
|
486
|
+
adapted_from=["VidoreDocVQARetrieval"],
|
|
483
487
|
)
|
|
484
488
|
|
|
485
489
|
def load_data(self) -> None:
|
|
@@ -1,8 +1,12 @@
|
|
|
1
1
|
from .ja_cwir_retrieval import JaCWIRRetrieval
|
|
2
|
+
from .ja_cwir_retrieval_lite import JaCWIRRetrievalLite
|
|
2
3
|
from .ja_gov_faqs_retrieval import JaGovFaqsRetrieval
|
|
3
4
|
from .ja_qu_ad_retrieval import JaQuADRetrieval
|
|
4
5
|
from .japanese_legal1_retrieval import JapaneseLegal1Retrieval
|
|
5
6
|
from .jaqket_retrieval import JaqketRetrieval
|
|
7
|
+
from .jaqket_retrieval_lite import JaqketRetrievalLite
|
|
8
|
+
from .miracl_ja_retrieval_lite import MIRACLJaRetrievalLite
|
|
9
|
+
from .mr_tydi_ja_retrieval_lite import MrTyDiJaRetrievalLite
|
|
6
10
|
from .nlp_journal_abs_article_retrieval import (
|
|
7
11
|
NLPJournalAbsArticleRetrieval,
|
|
8
12
|
NLPJournalAbsArticleRetrievalV2,
|
|
@@ -22,10 +26,14 @@ from .nlp_journal_title_intro_retrieval import (
|
|
|
22
26
|
|
|
23
27
|
__all__ = [
|
|
24
28
|
"JaCWIRRetrieval",
|
|
29
|
+
"JaCWIRRetrievalLite",
|
|
25
30
|
"JaGovFaqsRetrieval",
|
|
26
31
|
"JaQuADRetrieval",
|
|
27
32
|
"JapaneseLegal1Retrieval",
|
|
28
33
|
"JaqketRetrieval",
|
|
34
|
+
"JaqketRetrievalLite",
|
|
35
|
+
"MIRACLJaRetrievalLite",
|
|
36
|
+
"MrTyDiJaRetrievalLite",
|
|
29
37
|
"NLPJournalAbsArticleRetrieval",
|
|
30
38
|
"NLPJournalAbsArticleRetrievalV2",
|
|
31
39
|
"NLPJournalAbsIntroRetrieval",
|
|
@@ -9,10 +9,7 @@ class JaCWIRRetrieval(AbsTaskRetrieval):
|
|
|
9
9
|
|
|
10
10
|
metadata = TaskMetadata(
|
|
11
11
|
name="JaCWIRRetrieval",
|
|
12
|
-
description="
|
|
13
|
-
5000 question texts and approximately 500k web page titles and web page introductions or summaries
|
|
14
|
-
(meta descriptions, etc.). The question texts are created based on one of the 500k web pages,
|
|
15
|
-
and that data is used as a positive example for the question text.""",
|
|
12
|
+
description="JaCWIR is a small-scale Japanese information retrieval evaluation dataset consisting of 5000 question texts and approximately 500k web page titles and web page introductions or summaries (meta descriptions, etc.). The question texts are created based on one of the 500k web pages, and that data is used as a positive example for the question text.",
|
|
16
13
|
reference="https://huggingface.co/datasets/hotchpotch/JaCWIR",
|
|
17
14
|
dataset={
|
|
18
15
|
"path": "mteb/JaCWIRRetrieval",
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
from mteb.abstasks.retrieval import AbsTaskRetrieval
|
|
2
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class JaCWIRRetrievalLite(AbsTaskRetrieval):
|
|
6
|
+
metadata = TaskMetadata(
|
|
7
|
+
name="JaCWIRRetrievalLite",
|
|
8
|
+
dataset={
|
|
9
|
+
"path": "mteb/JaCWIRRetrievalLite",
|
|
10
|
+
"revision": "79472b360242cf2692e24a6d9999ef50d350d672",
|
|
11
|
+
},
|
|
12
|
+
description=(
|
|
13
|
+
"JaCWIR (Japanese Casual Web IR) is a dataset consisting of questions and webpage meta descriptions "
|
|
14
|
+
"collected from Hatena Bookmark. This is the lightweight version with a reduced corpus "
|
|
15
|
+
"(302,638 documents) constructed using hard negatives from 5 high-performance models."
|
|
16
|
+
),
|
|
17
|
+
reference="https://huggingface.co/datasets/hotchpotch/JaCWIR",
|
|
18
|
+
type="Retrieval",
|
|
19
|
+
category="t2t",
|
|
20
|
+
modalities=["text"],
|
|
21
|
+
eval_splits=["test"],
|
|
22
|
+
eval_langs=["jpn-Jpan"],
|
|
23
|
+
main_score="ndcg_at_10",
|
|
24
|
+
date=("2020-01-01", "2025-01-01"),
|
|
25
|
+
domains=["Web", "Written"],
|
|
26
|
+
task_subtypes=["Article retrieval"],
|
|
27
|
+
license="not specified",
|
|
28
|
+
annotations_creators="derived",
|
|
29
|
+
dialect=[],
|
|
30
|
+
sample_creation="found",
|
|
31
|
+
adapted_from=["JaCWIRRetrieval"],
|
|
32
|
+
bibtex_citation=r"""
|
|
33
|
+
@misc{jmteb_lite,
|
|
34
|
+
author = {Li, Shengzhe and Ohagi, Masaya and Ri, Ryokan and Fukuchi, Akihiko and Shibata, Tomohide
|
|
35
|
+
and Kawahara, Daisuke},
|
|
36
|
+
howpublished = {\url{https://huggingface.co/datasets/sbintuitions/JMTEB-lite}},
|
|
37
|
+
title = {{J}{M}{T}{E}{B}-lite: {T}he {L}ightweight {V}ersion of {JMTEB}},
|
|
38
|
+
year = {2025},
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
@misc{yuichi-tateno-2024-jacwir,
|
|
42
|
+
author = {Yuichi Tateno},
|
|
43
|
+
title = {JaCWIR: Japanese Casual Web IR - 日本語情報検索評価のための小規模でカジュアルなWebタイトルと概要のデータセット},
|
|
44
|
+
url = {https://huggingface.co/datasets/hotchpotch/JaCWIR},
|
|
45
|
+
}
|
|
46
|
+
""",
|
|
47
|
+
)
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
from mteb.abstasks.retrieval import AbsTaskRetrieval
|
|
2
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class JaqketRetrievalLite(AbsTaskRetrieval):
|
|
6
|
+
metadata = TaskMetadata(
|
|
7
|
+
name="JaqketRetrievalLite",
|
|
8
|
+
dataset={
|
|
9
|
+
"path": "mteb/JaqketRetrievalLite",
|
|
10
|
+
"revision": "860965fbb6526dd8edff12627dacf07c8f5a54f3",
|
|
11
|
+
},
|
|
12
|
+
description=(
|
|
13
|
+
"JAQKET (JApanese Questions on Knowledge of EnTities) is a QA dataset created based on quiz questions. "
|
|
14
|
+
"This is the lightweight version with a reduced corpus (65,802 documents) constructed using "
|
|
15
|
+
"hard negatives from 5 high-performance models."
|
|
16
|
+
),
|
|
17
|
+
reference="https://github.com/kumapo/JAQKET-dataset",
|
|
18
|
+
type="Retrieval",
|
|
19
|
+
category="t2t",
|
|
20
|
+
modalities=["text"],
|
|
21
|
+
eval_splits=["test"],
|
|
22
|
+
eval_langs=["jpn-Jpan"],
|
|
23
|
+
main_score="ndcg_at_10",
|
|
24
|
+
date=("2023-10-09", "2025-01-01"),
|
|
25
|
+
domains=["Encyclopaedic", "Non-fiction", "Written"],
|
|
26
|
+
task_subtypes=["Question answering"],
|
|
27
|
+
license="cc-by-sa-4.0",
|
|
28
|
+
annotations_creators="human-annotated",
|
|
29
|
+
dialect=[],
|
|
30
|
+
sample_creation="found",
|
|
31
|
+
adapted_from=["JaqketRetrieval"],
|
|
32
|
+
bibtex_citation=r"""
|
|
33
|
+
@misc{jmteb_lite,
|
|
34
|
+
author = {Li, Shengzhe and Ohagi, Masaya and Ri, Ryokan and Fukuchi, Akihiko and Shibata, Tomohide
|
|
35
|
+
and Kawahara, Daisuke},
|
|
36
|
+
howpublished = {\url{https://huggingface.co/datasets/sbintuitions/JMTEB-lite}},
|
|
37
|
+
title = {{J}{M}{T}{E}{B}-lite: {T}he {L}ightweight {V}ersion of {JMTEB}},
|
|
38
|
+
year = {2025},
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
@inproceedings{Kurihara_nlp2020,
|
|
42
|
+
author = {鈴木正敏 and 鈴木潤 and 松田耕史 and ⻄田京介 and 井之上直也},
|
|
43
|
+
booktitle = {言語処理学会第26回年次大会},
|
|
44
|
+
note = {in Japanese},
|
|
45
|
+
title = {JAQKET: クイズを題材にした日本語 QA データセットの構築},
|
|
46
|
+
url = {https://www.anlp.jp/proceedings/annual_meeting/2020/pdf_dir/P2-24.pdf},
|
|
47
|
+
year = {2020},
|
|
48
|
+
}
|
|
49
|
+
""",
|
|
50
|
+
)
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
from mteb.abstasks.retrieval import AbsTaskRetrieval
|
|
2
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class MIRACLJaRetrievalLite(AbsTaskRetrieval):
|
|
6
|
+
metadata = TaskMetadata(
|
|
7
|
+
name="MIRACLJaRetrievalLite",
|
|
8
|
+
dataset={
|
|
9
|
+
"path": "mteb/MIRACLJaRetrievalLite",
|
|
10
|
+
"revision": "575c225da29d1f5fec01082afa56f35df0f44295",
|
|
11
|
+
},
|
|
12
|
+
description=(
|
|
13
|
+
"MIRACL (Multilingual Information Retrieval Across a Continuum of Languages) is a multilingual "
|
|
14
|
+
"retrieval dataset. This is the lightweight Japanese version with a reduced corpus (105,064 documents) "
|
|
15
|
+
"constructed using hard negatives from 5 high-performance models."
|
|
16
|
+
),
|
|
17
|
+
reference="https://project-miracl.github.io/",
|
|
18
|
+
type="Retrieval",
|
|
19
|
+
category="t2t",
|
|
20
|
+
modalities=["text"],
|
|
21
|
+
eval_splits=["test"],
|
|
22
|
+
eval_langs=["jpn-Jpan"],
|
|
23
|
+
main_score="ndcg_at_10",
|
|
24
|
+
date=("2022-06-01", "2025-01-01"),
|
|
25
|
+
domains=["Encyclopaedic", "Written"],
|
|
26
|
+
task_subtypes=[],
|
|
27
|
+
license="apache-2.0",
|
|
28
|
+
annotations_creators="expert-annotated",
|
|
29
|
+
dialect=[],
|
|
30
|
+
sample_creation="created",
|
|
31
|
+
adapted_from=["MIRACLRetrieval"],
|
|
32
|
+
bibtex_citation=r"""
|
|
33
|
+
@article{10.1162/tacl_a_00595,
|
|
34
|
+
author = {Zhang, Xinyu and Thakur, Nandan and Ogundepo, Odunayo and Kamalloo, Ehsan and Alfonso-Hermelo, David
|
|
35
|
+
and Li, Xiaoguang and Liu, Qun and Rezagholizadeh, Mehdi and Lin, Jimmy},
|
|
36
|
+
doi = {10.1162/tacl_a_00595},
|
|
37
|
+
journal = {Transactions of the Association for Computational Linguistics},
|
|
38
|
+
pages = {1114-1131},
|
|
39
|
+
title = {{MIRACL: A Multilingual Retrieval Dataset Covering 18 Diverse Languages}},
|
|
40
|
+
volume = {11},
|
|
41
|
+
year = {2023},
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
@misc{jmteb_lite,
|
|
45
|
+
author = {Li, Shengzhe and Ohagi, Masaya and Ri, Ryokan and Fukuchi, Akihiko and Shibata, Tomohide
|
|
46
|
+
and Kawahara, Daisuke},
|
|
47
|
+
howpublished = {\url{https://huggingface.co/datasets/sbintuitions/JMTEB-lite}},
|
|
48
|
+
title = {{J}{M}{T}{E}{B}-lite: {T}he {L}ightweight {V}ersion of {JMTEB}},
|
|
49
|
+
year = {2025},
|
|
50
|
+
}
|
|
51
|
+
""",
|
|
52
|
+
)
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
from mteb.abstasks.retrieval import AbsTaskRetrieval
|
|
2
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class MrTyDiJaRetrievalLite(AbsTaskRetrieval):
|
|
6
|
+
metadata = TaskMetadata(
|
|
7
|
+
name="MrTyDiJaRetrievalLite",
|
|
8
|
+
dataset={
|
|
9
|
+
"path": "mteb/MrTyDiJaRetrievalLite",
|
|
10
|
+
"revision": "b87e6ff25f4e32d1c97498a539ea8aad5fde3cb1",
|
|
11
|
+
},
|
|
12
|
+
description=(
|
|
13
|
+
"Mr.TyDi is a multilingual benchmark dataset built on TyDi for document retrieval tasks. "
|
|
14
|
+
"This is the lightweight Japanese version with a reduced corpus (93,382 documents) constructed using "
|
|
15
|
+
"hard negatives from 5 high-performance models."
|
|
16
|
+
),
|
|
17
|
+
reference="https://huggingface.co/datasets/castorini/mr-tydi",
|
|
18
|
+
type="Retrieval",
|
|
19
|
+
category="t2t",
|
|
20
|
+
modalities=["text"],
|
|
21
|
+
eval_splits=["test"],
|
|
22
|
+
eval_langs=["jpn-Jpan"],
|
|
23
|
+
main_score="ndcg_at_10",
|
|
24
|
+
date=("2021-01-01", "2025-01-01"),
|
|
25
|
+
domains=["Encyclopaedic", "Non-fiction", "Written"],
|
|
26
|
+
task_subtypes=["Question answering"],
|
|
27
|
+
license="apache-2.0",
|
|
28
|
+
annotations_creators="human-annotated",
|
|
29
|
+
dialect=[],
|
|
30
|
+
sample_creation="found",
|
|
31
|
+
adapted_from=["MrTidyRetrieval"],
|
|
32
|
+
bibtex_citation=r"""
|
|
33
|
+
@misc{jmteb_lite,
|
|
34
|
+
author = {Li, Shengzhe and Ohagi, Masaya and Ri, Ryokan and Fukuchi, Akihiko and Shibata, Tomohide
|
|
35
|
+
and Kawahara, Daisuke},
|
|
36
|
+
howpublished = {\url{https://huggingface.co/datasets/sbintuitions/JMTEB-lite}},
|
|
37
|
+
title = {{J}{M}{T}{E}{B}-lite: {T}he {L}ightweight {V}ersion of {JMTEB}},
|
|
38
|
+
year = {2025},
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
@article{mrtydi,
|
|
42
|
+
author = {Xinyu Zhang and Xueguang Ma and Peng Shi and Jimmy Lin},
|
|
43
|
+
journal = {arXiv:2108.08787},
|
|
44
|
+
title = {{Mr. TyDi}: A Multi-lingual Benchmark for Dense Retrieval},
|
|
45
|
+
year = {2021},
|
|
46
|
+
}
|
|
47
|
+
""",
|
|
48
|
+
)
|
|
@@ -46,10 +46,17 @@ class GeorgianFAQRetrieval(AbsTaskRetrieval):
|
|
|
46
46
|
split=_EVAL_SPLIT,
|
|
47
47
|
revision=self.metadata.dataset["revision"],
|
|
48
48
|
)
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
}
|
|
52
|
-
|
|
49
|
+
|
|
50
|
+
question_ids = {}
|
|
51
|
+
answer_ids = {}
|
|
52
|
+
|
|
53
|
+
for row in data:
|
|
54
|
+
question = row["question"]
|
|
55
|
+
answer = row["answer"]
|
|
56
|
+
if question not in question_ids:
|
|
57
|
+
question_ids[question] = len(question_ids)
|
|
58
|
+
if answer not in answer_ids:
|
|
59
|
+
answer_ids[answer] = len(answer_ids)
|
|
53
60
|
|
|
54
61
|
for row in data:
|
|
55
62
|
question = row["question"]
|
|
@@ -1,4 +1,19 @@
|
|
|
1
1
|
from .auto_rag_retrieval import AutoRAGRetrieval
|
|
2
2
|
from .ko_strategy_qa import KoStrategyQA
|
|
3
|
+
from .kovidore2_bench_retrieval import (
|
|
4
|
+
KoVidore2CybersecurityRetrieval,
|
|
5
|
+
KoVidore2EconomicRetrieval,
|
|
6
|
+
KoVidore2EnergyRetrieval,
|
|
7
|
+
KoVidore2HrRetrieval,
|
|
8
|
+
)
|
|
9
|
+
from .squad_kor_v1_retrieval import SQuADKorV1Retrieval
|
|
3
10
|
|
|
4
|
-
__all__ = [
|
|
11
|
+
__all__ = [
|
|
12
|
+
"AutoRAGRetrieval",
|
|
13
|
+
"KoStrategyQA",
|
|
14
|
+
"KoVidore2CybersecurityRetrieval",
|
|
15
|
+
"KoVidore2EconomicRetrieval",
|
|
16
|
+
"KoVidore2EnergyRetrieval",
|
|
17
|
+
"KoVidore2HrRetrieval",
|
|
18
|
+
"SQuADKorV1Retrieval",
|
|
19
|
+
]
|