mteb 2.1.4__py3-none-any.whl → 2.7.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/__init__.py +6 -0
- mteb/_create_dataloaders.py +22 -20
- mteb/_evaluators/any_sts_evaluator.py +23 -14
- mteb/_evaluators/classification_metrics.py +54 -0
- mteb/_evaluators/clustering_evaluator.py +3 -3
- mteb/_evaluators/evaluator.py +4 -2
- mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +18 -11
- mteb/_evaluators/pair_classification_evaluator.py +34 -40
- mteb/_evaluators/retrieval_evaluator.py +2 -2
- mteb/_evaluators/retrieval_metrics.py +18 -17
- mteb/_evaluators/sklearn_evaluator.py +25 -37
- mteb/_evaluators/text/bitext_mining_evaluator.py +31 -19
- mteb/_evaluators/text/summarization_evaluator.py +27 -20
- mteb/_evaluators/zeroshot_classification_evaluator.py +7 -5
- mteb/abstasks/_data_filter/__init__.py +0 -0
- mteb/abstasks/_data_filter/filters.py +125 -0
- mteb/abstasks/_data_filter/task_pipelines.py +105 -0
- mteb/abstasks/_statistics_calculation.py +23 -11
- mteb/abstasks/_stratification.py +18 -18
- mteb/abstasks/abstask.py +35 -28
- mteb/abstasks/aggregate_task_metadata.py +1 -9
- mteb/abstasks/aggregated_task.py +10 -29
- mteb/abstasks/classification.py +15 -12
- mteb/abstasks/clustering.py +20 -16
- mteb/abstasks/clustering_legacy.py +13 -10
- mteb/abstasks/image/image_text_pair_classification.py +7 -4
- mteb/abstasks/multilabel_classification.py +33 -22
- mteb/abstasks/pair_classification.py +27 -11
- mteb/abstasks/regression.py +4 -4
- mteb/abstasks/retrieval.py +28 -24
- mteb/abstasks/retrieval_dataset_loaders.py +2 -2
- mteb/abstasks/sts.py +14 -4
- mteb/abstasks/task_metadata.py +32 -33
- mteb/abstasks/text/bitext_mining.py +39 -28
- mteb/abstasks/text/reranking.py +8 -6
- mteb/abstasks/text/summarization.py +10 -5
- mteb/abstasks/zeroshot_classification.py +8 -4
- mteb/benchmarks/_create_table.py +84 -37
- mteb/benchmarks/benchmark.py +77 -16
- mteb/benchmarks/benchmarks/__init__.py +12 -0
- mteb/benchmarks/benchmarks/benchmarks.py +361 -16
- mteb/benchmarks/get_benchmark.py +14 -53
- mteb/cache.py +227 -37
- mteb/cli/_display_tasks.py +2 -2
- mteb/cli/build_cli.py +110 -14
- mteb/cli/generate_model_card.py +43 -23
- mteb/deprecated_evaluator.py +71 -62
- mteb/descriptive_stats/BitextMining/RuSciBenchBitextMining.v2.json +61 -0
- mteb/descriptive_stats/Classification/HebrewSentimentAnalysis.v3.json +60 -0
- mteb/descriptive_stats/Classification/TurkishConstitutionalCourtViolation.json +54 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2CybersecurityRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EconomicRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EnergyRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2HrRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3ComputerScienceRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3EnergyRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceEnRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceFrRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3HrRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3IndustrialRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3NuclearRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PharmaceuticalsRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PhysicsRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3TelecomRetrieval.json +214 -0
- mteb/descriptive_stats/PairClassification/TERRa.V2.json +35 -0
- mteb/descriptive_stats/Reranking/JQaRARerankingLite.json +35 -0
- mteb/descriptive_stats/Reranking/JaCWIRRerankingLite.json +35 -0
- mteb/descriptive_stats/Reranking/MultiLongDocReranking.json +466 -0
- mteb/descriptive_stats/Retrieval/ArguAna-NL.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/ChemRxivRetrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/EuroPIRQRetrieval.json +116 -0
- mteb/descriptive_stats/Retrieval/JaCWIRRetrievalLite.json +30 -0
- mteb/descriptive_stats/Retrieval/JaqketRetrievalLite.json +30 -0
- mteb/descriptive_stats/Retrieval/MIRACLJaRetrievalLite.json +30 -0
- mteb/descriptive_stats/Retrieval/MrTyDiJaRetrievalLite.json +30 -0
- mteb/descriptive_stats/Retrieval/NFCorpus-NL.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoClimateFEVER-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoDBPedia-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoFEVER-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoHotpotQA-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoMSMARCO-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoNQ-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/SCIDOCS-NL.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/SQuADKorV1Retrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/SciFact-NL.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/TVPLRetrieval.json +30 -0
- mteb/evaluate.py +106 -75
- mteb/filter_tasks.py +25 -26
- mteb/get_tasks.py +29 -30
- mteb/languages/language_scripts.py +5 -3
- mteb/leaderboard/app.py +414 -151
- mteb/leaderboard/benchmark_selector.py +14 -5
- mteb/leaderboard/figures.py +13 -15
- mteb/leaderboard/table.py +82 -17
- mteb/load_results.py +12 -12
- mteb/models/__init__.py +4 -1
- mteb/models/abs_encoder.py +31 -23
- mteb/models/cache_wrappers/__init__.py +2 -1
- mteb/models/cache_wrappers/cache_backend_protocol.py +3 -5
- mteb/models/cache_wrappers/cache_backends/_hash_utils.py +7 -6
- mteb/models/cache_wrappers/cache_backends/faiss_cache.py +6 -2
- mteb/models/cache_wrappers/cache_backends/numpy_cache.py +43 -25
- mteb/models/cache_wrappers/cache_wrapper.py +3 -3
- mteb/models/get_model_meta.py +25 -118
- mteb/models/instruct_wrapper.py +33 -9
- mteb/models/model_implementations/align_models.py +8 -1
- mteb/models/model_implementations/amazon_models.py +1 -0
- mteb/models/model_implementations/andersborges.py +65 -0
- mteb/models/model_implementations/ara_models.py +9 -1
- mteb/models/model_implementations/arctic_models.py +16 -8
- mteb/models/model_implementations/b1ade_models.py +2 -1
- mteb/models/model_implementations/bedrock_models.py +4 -0
- mteb/models/model_implementations/bge_models.py +101 -17
- mteb/models/model_implementations/bica_model.py +35 -0
- mteb/models/model_implementations/blip2_models.py +13 -2
- mteb/models/model_implementations/blip_models.py +43 -16
- mteb/models/model_implementations/bm25.py +5 -4
- mteb/models/model_implementations/bmretriever_models.py +10 -4
- mteb/models/model_implementations/cadet_models.py +10 -1
- mteb/models/model_implementations/cde_models.py +25 -4
- mteb/models/model_implementations/clip_models.py +9 -6
- mteb/models/model_implementations/clips_models.py +100 -0
- mteb/models/model_implementations/codefuse_models.py +165 -3
- mteb/models/model_implementations/codesage_models.py +18 -3
- mteb/models/model_implementations/cohere_models.py +13 -6
- mteb/models/model_implementations/cohere_v.py +7 -2
- mteb/models/model_implementations/colpali_models.py +17 -9
- mteb/models/model_implementations/colqwen_models.py +275 -5
- mteb/models/model_implementations/colsmol_models.py +4 -2
- mteb/models/model_implementations/conan_models.py +2 -1
- mteb/models/model_implementations/dino_models.py +194 -23
- mteb/models/model_implementations/e5_instruct.py +27 -4
- mteb/models/model_implementations/e5_models.py +21 -110
- mteb/models/model_implementations/e5_v.py +7 -6
- mteb/models/model_implementations/eagerworks_models.py +164 -0
- mteb/models/model_implementations/emillykkejensen_models.py +91 -0
- mteb/models/model_implementations/en_code_retriever.py +2 -1
- mteb/models/model_implementations/euler_models.py +32 -0
- mteb/models/model_implementations/evaclip_models.py +4 -0
- mteb/models/model_implementations/fa_models.py +67 -9
- mteb/models/model_implementations/facebookai.py +205 -0
- mteb/models/model_implementations/geogpt_models.py +2 -1
- mteb/models/model_implementations/gme_v_models.py +17 -10
- mteb/models/model_implementations/google_models.py +17 -6
- mteb/models/model_implementations/granite_vision_embedding_models.py +8 -3
- mteb/models/model_implementations/gritlm_models.py +4 -2
- mteb/models/model_implementations/gte_models.py +99 -9
- mteb/models/model_implementations/hinvec_models.py +2 -1
- mteb/models/model_implementations/human.py +1 -0
- mteb/models/model_implementations/ibm_granite_models.py +36 -6
- mteb/models/model_implementations/inf_models.py +4 -2
- mteb/models/model_implementations/jasper_models.py +256 -3
- mteb/models/model_implementations/jina_clip.py +49 -10
- mteb/models/model_implementations/jina_models.py +222 -11
- mteb/models/model_implementations/kalm_models.py +203 -25
- mteb/models/model_implementations/kblab.py +37 -0
- mteb/models/model_implementations/kennethenevoldsen_models.py +74 -0
- mteb/models/model_implementations/kfst.py +25 -0
- mteb/models/model_implementations/kowshik24_models.py +32 -0
- mteb/models/model_implementations/lens_models.py +2 -0
- mteb/models/model_implementations/lgai_embedding_models.py +2 -1
- mteb/models/model_implementations/linq_models.py +4 -3
- mteb/models/model_implementations/listconranker.py +2 -2
- mteb/models/model_implementations/llm2clip_models.py +9 -6
- mteb/models/model_implementations/llm2vec_models.py +16 -8
- mteb/models/model_implementations/mcinext_models.py +7 -1
- mteb/models/model_implementations/mdbr_models.py +19 -3
- mteb/models/model_implementations/misc_models.py +422 -60
- mteb/models/model_implementations/mixedbread_ai_models.py +332 -0
- mteb/models/model_implementations/mme5_models.py +2 -1
- mteb/models/model_implementations/moco_models.py +15 -4
- mteb/models/model_implementations/mod_models.py +191 -0
- mteb/models/model_implementations/model2vec_models.py +27 -14
- mteb/models/model_implementations/moka_models.py +4 -1
- mteb/models/model_implementations/nbailab.py +70 -0
- mteb/models/model_implementations/no_instruct_sentence_models.py +3 -2
- mteb/models/model_implementations/nomic_models.py +173 -6
- mteb/models/model_implementations/nomic_models_vision.py +8 -3
- mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +32 -19
- mteb/models/model_implementations/nvidia_models.py +155 -20
- mteb/models/model_implementations/octen_models.py +254 -0
- mteb/models/model_implementations/openai_models.py +20 -16
- mteb/models/model_implementations/openclip_models.py +37 -13
- mteb/models/model_implementations/opensearch_neural_sparse_models.py +10 -5
- mteb/models/model_implementations/ops_moa_models.py +5 -3
- mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +1 -1
- mteb/models/model_implementations/pawan_models.py +39 -0
- mteb/models/model_implementations/piccolo_models.py +9 -1
- mteb/models/model_implementations/pixie_models.py +56 -0
- mteb/models/model_implementations/promptriever_models.py +12 -8
- mteb/models/model_implementations/pylate_models.py +46 -12
- mteb/models/model_implementations/qodo_models.py +4 -2
- mteb/models/model_implementations/qtack_models.py +2 -1
- mteb/models/model_implementations/qwen3_models.py +9 -6
- mteb/models/model_implementations/qzhou_models.py +5 -3
- mteb/models/model_implementations/random_baseline.py +19 -24
- mteb/models/model_implementations/rasgaard_models.py +34 -0
- mteb/models/model_implementations/reasonir_model.py +2 -1
- mteb/models/model_implementations/repllama_models.py +5 -3
- mteb/models/model_implementations/rerankers_custom.py +15 -9
- mteb/models/model_implementations/rerankers_monot5_based.py +31 -31
- mteb/models/model_implementations/richinfoai_models.py +2 -1
- mteb/models/model_implementations/ru_sentence_models.py +71 -20
- mteb/models/model_implementations/ruri_models.py +322 -0
- mteb/models/model_implementations/salesforce_models.py +6 -3
- mteb/models/model_implementations/samilpwc_models.py +2 -1
- mteb/models/model_implementations/sarashina_embedding_models.py +168 -0
- mteb/models/model_implementations/searchmap_models.py +2 -1
- mteb/models/model_implementations/seed_1_6_embedding_models.py +8 -2
- mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +625 -0
- mteb/models/model_implementations/seed_models.py +1 -0
- mteb/models/model_implementations/sentence_transformers_models.py +177 -18
- mteb/models/model_implementations/shuu_model.py +32 -31
- mteb/models/model_implementations/siglip_models.py +30 -20
- mteb/models/model_implementations/slm_models.py +416 -0
- mteb/models/model_implementations/sonar_models.py +1 -0
- mteb/models/model_implementations/spartan8806_atles_champion.py +34 -0
- mteb/models/model_implementations/stella_models.py +23 -4
- mteb/models/model_implementations/tarka_models.py +376 -0
- mteb/models/model_implementations/text2vec_models.py +9 -3
- mteb/models/model_implementations/ua_sentence_models.py +11 -1
- mteb/models/model_implementations/uae_models.py +8 -1
- mteb/models/model_implementations/vdr_models.py +3 -1
- mteb/models/model_implementations/vi_vn_models.py +45 -6
- mteb/models/model_implementations/vista_models.py +2 -0
- mteb/models/model_implementations/vlm2vec_models.py +5 -3
- mteb/models/model_implementations/voyage_models.py +99 -0
- mteb/models/model_implementations/voyage_v.py +17 -9
- mteb/models/model_implementations/xyz_models.py +1 -0
- mteb/models/model_implementations/youtu_models.py +2 -1
- mteb/models/model_implementations/yuan_models.py +34 -0
- mteb/models/model_implementations/yuan_models_en.py +58 -0
- mteb/models/model_meta.py +498 -29
- mteb/models/models_protocols.py +22 -6
- mteb/models/search_encoder_index/__init__.py +7 -0
- mteb/models/search_encoder_index/search_backend_protocol.py +50 -0
- mteb/models/search_encoder_index/search_indexes/__init__.py +5 -0
- mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +160 -0
- mteb/models/search_wrappers.py +197 -65
- mteb/models/sentence_transformer_wrapper.py +52 -32
- mteb/models/vllm_wrapper.py +327 -0
- mteb/py.typed +0 -0
- mteb/results/benchmark_results.py +114 -65
- mteb/results/model_result.py +63 -26
- mteb/results/task_result.py +117 -77
- mteb/similarity_functions.py +60 -7
- mteb/tasks/bitext_mining/multilingual/__init__.py +2 -1
- mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +4 -2
- mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining_fast.py +1 -1
- mteb/tasks/bitext_mining/multilingual/ru_sci_bench_bitext_mining.py +47 -5
- mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -6
- mteb/tasks/classification/ara/ajgt.py +1 -2
- mteb/tasks/classification/ara/hotel_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/ara/tweet_emotion_classification.py +1 -2
- mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -2
- mteb/tasks/classification/ben/bengali_document_classification.py +1 -2
- mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -2
- mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -2
- mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/ces/czech_product_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/ces/czech_so_me_sentiment_classification.py +1 -2
- mteb/tasks/classification/dan/angry_tweets_classification.py +1 -2
- mteb/tasks/classification/dan/danish_political_comments_classification.py +1 -2
- mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -2
- mteb/tasks/classification/dan/dk_hate_classification.py +2 -3
- mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -2
- mteb/tasks/classification/deu/ten_k_gnad_classification.py +1 -2
- mteb/tasks/classification/eng/amazon_polarity_classification.py +1 -2
- mteb/tasks/classification/eng/arxiv_classification.py +1 -2
- mteb/tasks/classification/eng/banking77_classification.py +1 -2
- mteb/tasks/classification/eng/dbpedia_classification.py +1 -2
- mteb/tasks/classification/eng/emotion_classification.py +1 -2
- mteb/tasks/classification/eng/financial_phrasebank_classification.py +1 -2
- mteb/tasks/classification/eng/frenk_en_classification.py +1 -2
- mteb/tasks/classification/eng/gtsrb_classification.py +1 -1
- mteb/tasks/classification/eng/imdb_classification.py +1 -2
- mteb/tasks/classification/eng/legal_bench_classification.py +14 -120
- mteb/tasks/classification/eng/news_classification.py +1 -2
- mteb/tasks/classification/eng/patch_camelyon_classification.py +1 -1
- mteb/tasks/classification/eng/patent_classification.py +1 -2
- mteb/tasks/classification/eng/poem_sentiment_classification.py +1 -2
- mteb/tasks/classification/eng/sds_eye_protection_classification.py +1 -2
- mteb/tasks/classification/eng/sds_gloves_classification.py +1 -2
- mteb/tasks/classification/eng/toxic_chat_classification.py +2 -19
- mteb/tasks/classification/eng/toxic_conversations_classification.py +1 -2
- mteb/tasks/classification/eng/tweet_sentiment_extraction_classification.py +1 -2
- mteb/tasks/classification/eng/tweet_topic_single_classification.py +2 -13
- mteb/tasks/classification/eng/ucf101_classification.py +1 -5
- mteb/tasks/classification/eng/wikipedia_bio_met_chem_classification.py +1 -2
- mteb/tasks/classification/eng/wikipedia_chem_fields_classification.py +1 -2
- mteb/tasks/classification/eng/wikipedia_comp_chem_spectroscopy_classification.py +1 -2
- mteb/tasks/classification/eng/wikipedia_crystallography_analytical_classification.py +1 -2
- mteb/tasks/classification/eng/wikipedia_theoretical_applied_classification.py +1 -2
- mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -2
- mteb/tasks/classification/eng/yelp_review_full_classification.py +1 -2
- mteb/tasks/classification/est/estonian_valence.py +2 -3
- mteb/tasks/classification/fas/fa_mteb_classification.py +7 -14
- mteb/tasks/classification/fil/filipino_hate_speech_classification.py +1 -2
- mteb/tasks/classification/fin/fin_toxicity_classification.py +2 -11
- mteb/tasks/classification/fra/french_book_reviews.py +1 -2
- mteb/tasks/classification/fra/movie_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/guj/gujarati_news_classification.py +1 -2
- mteb/tasks/classification/heb/__init__.py +6 -1
- mteb/tasks/classification/heb/hebrew_sentiment_analysis.py +62 -4
- mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -2
- mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -2
- mteb/tasks/classification/hrv/frenk_hr_classification.py +1 -2
- mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +1 -2
- mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -2
- mteb/tasks/classification/ita/italian_linguist_acceptability_classification.py +1 -2
- mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -2
- mteb/tasks/classification/jpn/wrime_classification.py +1 -2
- mteb/tasks/classification/kan/kannada_news_classification.py +1 -2
- mteb/tasks/classification/kor/klue_tc.py +1 -2
- mteb/tasks/classification/kor/kor_hate_classification.py +2 -17
- mteb/tasks/classification/kor/kor_sarcasm_classification.py +2 -19
- mteb/tasks/classification/kur/kurdish_sentiment_classification.py +3 -4
- mteb/tasks/classification/mal/malayalam_news_classification.py +1 -2
- mteb/tasks/classification/mar/marathi_news_classification.py +1 -2
- mteb/tasks/classification/mkd/macedonian_tweet_sentiment_classification.py +1 -2
- mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -6
- mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -4
- mteb/tasks/classification/multilingual/ru_sci_bench_classification.py +4 -23
- mteb/tasks/classification/multilingual/scala_classification.py +2 -3
- mteb/tasks/classification/multilingual/sib200_classification.py +1 -6
- mteb/tasks/classification/mya/myanmar_news.py +1 -2
- mteb/tasks/classification/nep/nepali_news_classification.py +1 -2
- mteb/tasks/classification/nld/dutch_book_review_sentiment_classification.py +4 -2
- mteb/tasks/classification/nld/dutch_cola_classification.py +3 -0
- mteb/tasks/classification/nld/dutch_government_bias_classification.py +3 -0
- mteb/tasks/classification/nld/dutch_news_articles_classification.py +3 -0
- mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +3 -0
- mteb/tasks/classification/nld/iconclass_classification.py +3 -0
- mteb/tasks/classification/nld/open_tender_classification.py +3 -0
- mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +3 -0
- mteb/tasks/classification/nob/no_rec_classification.py +1 -2
- mteb/tasks/classification/nob/norwegian_parliament_classification.py +1 -2
- mteb/tasks/classification/ory/odia_news_classification.py +1 -2
- mteb/tasks/classification/pol/polish_classification.py +3 -6
- mteb/tasks/classification/ron/moroco.py +1 -2
- mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -2
- mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -2
- mteb/tasks/classification/rus/georeview_classification.py +1 -2
- mteb/tasks/classification/rus/headline_classification.py +1 -2
- mteb/tasks/classification/rus/inappropriateness_classification.py +1 -2
- mteb/tasks/classification/rus/ru_reviews_classification.py +1 -2
- mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -2
- mteb/tasks/classification/rus/senti_ru_eval.py +1 -2
- mteb/tasks/classification/sin/sinhala_news_classification.py +1 -2
- mteb/tasks/classification/sin/sinhala_news_source_classification.py +1 -2
- mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/slk/slovak_hate_speech_classification.py +1 -2
- mteb/tasks/classification/slk/slovak_movie_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/slv/frenk_sl_classification.py +1 -2
- mteb/tasks/classification/spa/spanish_news_classification.py +1 -2
- mteb/tasks/classification/spa/spanish_sentiment_classification.py +1 -2
- mteb/tasks/classification/ssw/siswati_news_classification.py +1 -2
- mteb/tasks/classification/swa/swahili_news_classification.py +1 -2
- mteb/tasks/classification/swe/dalaj_classification.py +1 -2
- mteb/tasks/classification/swe/swe_rec_classification.py +1 -2
- mteb/tasks/classification/swe/swedish_sentiment_classification.py +1 -2
- mteb/tasks/classification/tam/tamil_news_classification.py +1 -2
- mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +1 -2
- mteb/tasks/classification/tha/wisesight_sentiment_classification.py +1 -2
- mteb/tasks/classification/tsn/tswana_news_classification.py +1 -2
- mteb/tasks/classification/tur/__init__.py +4 -0
- mteb/tasks/classification/tur/turkish_constitutional_court.py +41 -0
- mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +1 -2
- mteb/tasks/classification/tur/turkish_product_sentiment_classification.py +1 -2
- mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -15
- mteb/tasks/classification/urd/urdu_roman_sentiment_classification.py +1 -2
- mteb/tasks/classification/vie/amazon_counterfactual_vn_classification.py +1 -6
- mteb/tasks/classification/vie/amazon_polarity_vn_classification.py +1 -6
- mteb/tasks/classification/vie/amazon_reviews_vn_classification.py +1 -5
- mteb/tasks/classification/vie/banking77_vn_classification.py +1 -5
- mteb/tasks/classification/vie/emotion_vn_classification.py +1 -5
- mteb/tasks/classification/vie/imdb_vn_classification.py +1 -5
- mteb/tasks/classification/vie/massive_intent_vn_classification.py +1 -5
- mteb/tasks/classification/vie/massive_scenario_vn_classification.py +1 -5
- mteb/tasks/classification/vie/mtop_domain_vn_classification.py +1 -5
- mteb/tasks/classification/vie/mtop_intent_vn_classification.py +1 -5
- mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -5
- mteb/tasks/classification/vie/tweet_sentiment_extraction_vn_classification.py +1 -5
- mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -2
- mteb/tasks/classification/zho/cmteb_classification.py +5 -10
- mteb/tasks/classification/zho/yue_openrice_review_classification.py +1 -2
- mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -2
- mteb/tasks/clustering/eng/hume_wiki_cities_clustering.py +1 -1
- mteb/tasks/clustering/eng/wiki_cities_clustering.py +1 -1
- mteb/tasks/clustering/jpn/mews_c16_ja_clustering.py +1 -3
- mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -6
- mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +3 -0
- mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +3 -0
- mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +3 -0
- mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +3 -0
- mteb/tasks/clustering/nld/open_tender_clustering_s2s.py +3 -0
- mteb/tasks/clustering/nld/vabb_clustering_p2p.py +3 -0
- mteb/tasks/clustering/nld/vabb_clustering_s2s.py +3 -0
- mteb/tasks/clustering/vie/reddit_clustering_p2p_vn.py +1 -5
- mteb/tasks/clustering/vie/reddit_clustering_vn.py +1 -5
- mteb/tasks/clustering/vie/stack_exchange_clustering_p2p_vn.py +1 -5
- mteb/tasks/clustering/vie/stack_exchange_clustering_vn.py +1 -5
- mteb/tasks/clustering/vie/twenty_newsgroups_clustering_vn.py +1 -5
- mteb/tasks/clustering/zho/cmteb_clustering.py +2 -2
- mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +1 -1
- mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -5
- mteb/tasks/multilabel_classification/kor/kor_hate_speech_ml_classification.py +1 -9
- mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -6
- mteb/tasks/multilabel_classification/nld/covid_disinformation_nl_multi_label_classification.py +3 -0
- mteb/tasks/multilabel_classification/nld/vabb_multi_label_classification.py +3 -0
- mteb/tasks/multilabel_classification/por/brazilian_toxic_tweets_classification.py +1 -6
- mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
- mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -2
- mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -6
- mteb/tasks/pair_classification/eng/legal_bench_pc.py +1 -9
- mteb/tasks/pair_classification/nld/sick_nl_pair_classification.py +3 -0
- mteb/tasks/pair_classification/nld/xlwic_nl_pair_classification.py +3 -0
- mteb/tasks/pair_classification/rus/__init__.py +2 -2
- mteb/tasks/pair_classification/rus/terra.py +51 -25
- mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -5
- mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -5
- mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -5
- mteb/tasks/regression/multilingual/ru_sci_bench_regression.py +2 -6
- mteb/tasks/reranking/jpn/__init__.py +9 -1
- mteb/tasks/reranking/jpn/j_qa_ra_reranking_lite.py +49 -0
- mteb/tasks/reranking/jpn/ja_cwir_reranking_lite.py +47 -0
- mteb/tasks/reranking/multilingual/__init__.py +2 -0
- mteb/tasks/reranking/multilingual/multi_long_doc_reranking.py +70 -0
- mteb/tasks/reranking/multilingual/wikipedia_reranking_multilingual.py +1 -1
- mteb/tasks/reranking/multilingual/x_glue_wpr_reranking.py +1 -2
- mteb/tasks/reranking/vie/ask_ubuntu_dup_questions_vn.py +1 -5
- mteb/tasks/reranking/vie/sci_docs_reranking_vn.py +1 -5
- mteb/tasks/reranking/vie/stack_overflow_dup_questions_vn.py +1 -5
- mteb/tasks/retrieval/code/code_rag.py +12 -12
- mteb/tasks/retrieval/code/fresh_stack_retrieval.py +8 -5
- mteb/tasks/retrieval/dan/dan_fever_retrieval.py +1 -1
- mteb/tasks/retrieval/dan/tv2_nordretrieval.py +2 -2
- mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +2 -2
- mteb/tasks/retrieval/eng/__init__.py +2 -0
- mteb/tasks/retrieval/eng/chemrxiv.py +33 -0
- mteb/tasks/retrieval/eng/cub200_i2i_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -8
- mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +4 -0
- mteb/tasks/retrieval/jpn/__init__.py +8 -0
- mteb/tasks/retrieval/jpn/ja_cwir_retrieval.py +1 -4
- mteb/tasks/retrieval/jpn/ja_cwir_retrieval_lite.py +47 -0
- mteb/tasks/retrieval/jpn/jaqket_retrieval_lite.py +50 -0
- mteb/tasks/retrieval/jpn/miracl_ja_retrieval_lite.py +52 -0
- mteb/tasks/retrieval/jpn/mr_tydi_ja_retrieval_lite.py +48 -0
- mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +11 -4
- mteb/tasks/retrieval/kor/__init__.py +16 -1
- mteb/tasks/retrieval/kor/kovidore2_bench_retrieval.py +142 -0
- mteb/tasks/retrieval/kor/squad_kor_v1_retrieval.py +47 -0
- mteb/tasks/retrieval/multilingual/__init__.py +24 -0
- mteb/tasks/retrieval/multilingual/belebele_retrieval.py +5 -4
- mteb/tasks/retrieval/multilingual/euro_pirq_retrieval.py +43 -0
- mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +56 -42
- mteb/tasks/retrieval/multilingual/mkqa_retrieval.py +1 -2
- mteb/tasks/retrieval/multilingual/mlqa_retrieval.py +1 -4
- mteb/tasks/retrieval/multilingual/multi_long_doc_retrieval.py +1 -2
- mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +9 -4
- mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +2 -12
- mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +4 -2
- mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +389 -0
- mteb/tasks/retrieval/nld/__init__.py +8 -4
- mteb/tasks/retrieval/nld/argu_ana_nl_retrieval.py +46 -27
- mteb/tasks/retrieval/nld/bbsard_nl_retrieval.py +3 -0
- mteb/tasks/retrieval/nld/dutch_news_articles_retrieval.py +3 -0
- mteb/tasks/retrieval/nld/legal_qa_nl_retrieval.py +3 -0
- mteb/tasks/retrieval/nld/nf_corpus_nl_retrieval.py +42 -25
- mteb/tasks/retrieval/nld/open_tender_retrieval.py +3 -0
- mteb/tasks/retrieval/nld/sci_fact_nl_retrieval.py +42 -24
- mteb/tasks/retrieval/nld/scidocsnl_retrieval.py +44 -27
- mteb/tasks/retrieval/nld/vabb_retrieval.py +3 -0
- mteb/tasks/retrieval/nob/norquad.py +2 -2
- mteb/tasks/retrieval/nob/snl_retrieval.py +2 -2
- mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -7
- mteb/tasks/retrieval/tur/tur_hist_quad.py +1 -1
- mteb/tasks/retrieval/vie/__init__.py +14 -6
- mteb/tasks/retrieval/vie/argu_ana_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +40 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_android_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_gis_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_mathematica_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_physics_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_programmers_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_stats_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_tex_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_unix_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_webmasters_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_wordpress_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +40 -5
- mteb/tasks/retrieval/vie/fevervn_retrieval.py +40 -7
- mteb/tasks/retrieval/vie/fi_qa2018_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/green_node_table_markdown_retrieval.py +16 -1
- mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +40 -6
- mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +49 -5
- mteb/tasks/retrieval/vie/nf_corpus_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/nqvn_retrieval.py +40 -5
- mteb/tasks/retrieval/vie/quora_vn_retrieval.py +1 -6
- mteb/tasks/retrieval/vie/sci_fact_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/scidocsvn_retrieval.py +1 -6
- mteb/tasks/retrieval/vie/touche2020_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/treccovidvn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/tvpl_retrieval.py +42 -0
- mteb/tasks/retrieval/vie/zac_legal_text_retrieval.py +15 -1
- mteb/tasks/sts/nld/sick_nl_sts.py +1 -0
- mteb/tasks/sts/vie/biosses_stsvn.py +1 -5
- mteb/tasks/sts/vie/sickr_stsvn.py +1 -5
- mteb/tasks/sts/vie/sts_benchmark_stsvn.py +1 -5
- mteb/tasks/zeroshot_classification/eng/gtsrb.py +1 -1
- mteb/tasks/zeroshot_classification/eng/patch_camelyon.py +1 -1
- mteb/tasks/zeroshot_classification/eng/ucf101.py +1 -5
- mteb/types/__init__.py +2 -0
- mteb/types/_encoder_io.py +19 -2
- mteb/types/_result.py +2 -1
- mteb/types/statistics.py +9 -3
- {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/METADATA +25 -8
- {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/RECORD +525 -438
- mteb/models/model_implementations/mxbai_models.py +0 -102
- mteb/models/model_implementations/nb_sbert.py +0 -25
- {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/WHEEL +0 -0
- {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/entry_points.txt +0 -0
- {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.1.4.dist-info → mteb-2.7.2.dist-info}/top_level.txt +0 -0
|
@@ -1,30 +1,26 @@
|
|
|
1
1
|
from mteb.abstasks.retrieval import AbsTaskRetrieval
|
|
2
2
|
from mteb.abstasks.task_metadata import TaskMetadata
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
annotations_creators="derived",
|
|
25
|
-
dialect=[],
|
|
26
|
-
sample_creation="machine-translated and verified", # manually checked a small subset
|
|
27
|
-
bibtex_citation=r"""
|
|
4
|
+
_sci_fact_nl_metadata = dict(
|
|
5
|
+
dataset={
|
|
6
|
+
"path": "clips/beir-nl-scifact",
|
|
7
|
+
"revision": "856d8dfc294b138856bbf3042450e3782321e44e",
|
|
8
|
+
},
|
|
9
|
+
reference="https://huggingface.co/datasets/clips/beir-nl-scifact",
|
|
10
|
+
type="Retrieval",
|
|
11
|
+
category="t2t",
|
|
12
|
+
modalities=["text"],
|
|
13
|
+
eval_splits=["test"],
|
|
14
|
+
eval_langs=["nld-Latn"],
|
|
15
|
+
main_score="ndcg_at_10",
|
|
16
|
+
date=("2020-05-01", "2020-05-01"), # best guess: based on submission date
|
|
17
|
+
domains=["Academic", "Medical", "Written"],
|
|
18
|
+
task_subtypes=[],
|
|
19
|
+
license="cc-by-4.0",
|
|
20
|
+
annotations_creators="derived",
|
|
21
|
+
dialect=[],
|
|
22
|
+
sample_creation="machine-translated and verified", # manually checked a small subset
|
|
23
|
+
bibtex_citation=r"""
|
|
28
24
|
@misc{banar2024beirnlzeroshotinformationretrieval,
|
|
29
25
|
archiveprefix = {arXiv},
|
|
30
26
|
author = {Nikolay Banar and Ehsan Lotfi and Walter Daelemans},
|
|
@@ -35,5 +31,27 @@ class SciFactNL(AbsTaskRetrieval):
|
|
|
35
31
|
year = {2024},
|
|
36
32
|
}
|
|
37
33
|
""",
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class SciFactNL(AbsTaskRetrieval):
|
|
38
|
+
metadata = TaskMetadata(
|
|
39
|
+
name="SciFact-NL",
|
|
40
|
+
description="SciFactNL verifies scientific claims in Dutch using evidence from the research literature "
|
|
41
|
+
"containing scientific paper abstracts.",
|
|
38
42
|
adapted_from=["SciFact"],
|
|
43
|
+
**_sci_fact_nl_metadata,
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class SciFactNLv2(AbsTaskRetrieval):
|
|
48
|
+
metadata = TaskMetadata(
|
|
49
|
+
name="SciFact-NL.v2",
|
|
50
|
+
description="SciFactNL verifies scientific claims in Dutch using evidence from the research literature "
|
|
51
|
+
"containing scientific paper abstracts. This version adds a Dutch prompt to the dataset.",
|
|
52
|
+
adapted_from=["SciFact-NL"],
|
|
53
|
+
prompt={
|
|
54
|
+
"query": "Given a scientific claim, retrieve documents that support or refute the claim"
|
|
55
|
+
},
|
|
56
|
+
**_sci_fact_nl_metadata,
|
|
39
57
|
)
|
|
@@ -1,33 +1,26 @@
|
|
|
1
1
|
from mteb.abstasks.retrieval import AbsTaskRetrieval
|
|
2
2
|
from mteb.abstasks.task_metadata import TaskMetadata
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
domains=["Academic", "Written", "Non-fiction"],
|
|
25
|
-
task_subtypes=[],
|
|
26
|
-
license="cc-by-sa-4.0",
|
|
27
|
-
annotations_creators="derived",
|
|
28
|
-
dialect=[],
|
|
29
|
-
sample_creation="machine-translated and verified", # manually checked a small subset
|
|
30
|
-
bibtex_citation=r"""
|
|
4
|
+
_scidocsnl_metadata = dict(
|
|
5
|
+
dataset={
|
|
6
|
+
"path": "clips/beir-nl-scidocs",
|
|
7
|
+
"revision": "4e018aa220029f9d1bd5a31de3650e322e32ea38",
|
|
8
|
+
},
|
|
9
|
+
reference="https://huggingface.co/datasets/clips/beir-nl-scidocs",
|
|
10
|
+
type="Retrieval",
|
|
11
|
+
category="t2t",
|
|
12
|
+
modalities=["text"],
|
|
13
|
+
eval_splits=["test"],
|
|
14
|
+
eval_langs=["nld-Latn"],
|
|
15
|
+
main_score="ndcg_at_10",
|
|
16
|
+
date=("2020-05-01", "2020-05-01"), # best guess: based on submission date
|
|
17
|
+
domains=["Academic", "Written", "Non-fiction"],
|
|
18
|
+
task_subtypes=[],
|
|
19
|
+
license="cc-by-sa-4.0",
|
|
20
|
+
annotations_creators="derived",
|
|
21
|
+
dialect=[],
|
|
22
|
+
sample_creation="machine-translated and verified", # manually checked a small subset
|
|
23
|
+
bibtex_citation=r"""
|
|
31
24
|
@misc{banar2024beirnlzeroshotinformationretrieval,
|
|
32
25
|
archiveprefix = {arXiv},
|
|
33
26
|
author = {Nikolay Banar and Ehsan Lotfi and Walter Daelemans},
|
|
@@ -38,5 +31,29 @@ class SCIDOCSNL(AbsTaskRetrieval):
|
|
|
38
31
|
year = {2024},
|
|
39
32
|
}
|
|
40
33
|
""",
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class SCIDOCSNL(AbsTaskRetrieval):
|
|
38
|
+
metadata = TaskMetadata(
|
|
39
|
+
name="SCIDOCS-NL",
|
|
40
|
+
description="SciDocs, a new evaluation benchmark consisting of seven document-level tasks ranging from "
|
|
41
|
+
"citation prediction, to document classification and recommendation. SciDocs-NL is a Dutch "
|
|
42
|
+
"translation.",
|
|
41
43
|
adapted_from=["SCIDOCS"],
|
|
44
|
+
**_scidocsnl_metadata,
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class SCIDOCSNLv2(AbsTaskRetrieval):
|
|
49
|
+
metadata = TaskMetadata(
|
|
50
|
+
name="SCIDOCS-NL.v2",
|
|
51
|
+
description="SciDocs, a new evaluation benchmark consisting of seven document-level tasks ranging from "
|
|
52
|
+
"citation prediction, to document classification and recommendation. SciDocs-NL is a Dutch "
|
|
53
|
+
"translation. This version adds a Dutch prompt to the dataset.",
|
|
54
|
+
adapted_from=["SCIDOCS-NL"],
|
|
55
|
+
**_scidocsnl_metadata,
|
|
56
|
+
prompt={
|
|
57
|
+
"query": "Gegeven de titel van een wetenschappelijk artikel, haal de abstracts op van artikelen die door het gegeven artikel worden geciteerd"
|
|
58
|
+
},
|
|
42
59
|
)
|
|
@@ -54,7 +54,7 @@ Fishel, Mark},
|
|
|
54
54
|
"""Load dataset from HuggingFace hub"""
|
|
55
55
|
if self.data_loaded:
|
|
56
56
|
return
|
|
57
|
-
self.dataset = datasets.load_dataset(**self.metadata.dataset)
|
|
57
|
+
self.dataset = datasets.load_dataset(**self.metadata.dataset)
|
|
58
58
|
self.dataset_transform()
|
|
59
59
|
self.data_loaded = True
|
|
60
60
|
|
|
@@ -71,7 +71,7 @@ Fishel, Mark},
|
|
|
71
71
|
text2id = {}
|
|
72
72
|
|
|
73
73
|
for split in self.dataset:
|
|
74
|
-
ds: datasets.Dataset = self.dataset[split]
|
|
74
|
+
ds: datasets.Dataset = self.dataset[split]
|
|
75
75
|
ds = ds.shuffle(seed=42)
|
|
76
76
|
max_samples = min(1024, len(ds))
|
|
77
77
|
ds = ds.select(
|
|
@@ -41,7 +41,7 @@ class SNLRetrieval(AbsTaskRetrieval):
|
|
|
41
41
|
"""Load dataset from HuggingFace hub"""
|
|
42
42
|
if self.data_loaded:
|
|
43
43
|
return
|
|
44
|
-
self.dataset = datasets.load_dataset(**self.metadata.dataset)
|
|
44
|
+
self.dataset = datasets.load_dataset(**self.metadata.dataset)
|
|
45
45
|
self.dataset_transform()
|
|
46
46
|
self.data_loaded = True
|
|
47
47
|
|
|
@@ -58,7 +58,7 @@ class SNLRetrieval(AbsTaskRetrieval):
|
|
|
58
58
|
text2id = {}
|
|
59
59
|
|
|
60
60
|
for split in self.dataset:
|
|
61
|
-
ds: datasets.Dataset = self.dataset[split]
|
|
61
|
+
ds: datasets.Dataset = self.dataset[split]
|
|
62
62
|
ds = ds.shuffle(seed=42)
|
|
63
63
|
|
|
64
64
|
self.queries[split] = {}
|
|
@@ -7,13 +7,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
|
|
|
7
7
|
class SlovakSumRetrieval(AbsTaskRetrieval):
|
|
8
8
|
metadata = TaskMetadata(
|
|
9
9
|
name="SlovakSumRetrieval",
|
|
10
|
-
description=""
|
|
11
|
-
SlovakSum, a Slovak news summarization dataset consisting of over 200 thousand
|
|
12
|
-
news articles with titles and short abstracts obtained from multiple Slovak newspapers.
|
|
13
|
-
|
|
14
|
-
Originally intended as a summarization task, but since no human annotations were provided
|
|
15
|
-
here reformulated to a retrieval task.
|
|
16
|
-
""",
|
|
10
|
+
description="SlovakSum, a Slovak news summarization dataset consisting of over 200 thousand news articles with titles and short abstracts obtained from multiple Slovak newspapers. Originally intended as a summarization task, but since no human annotations were provided here reformulated to a retrieval task.",
|
|
17
11
|
reference="https://huggingface.co/datasets/NaiveNeuron/slovaksum",
|
|
18
12
|
dataset={
|
|
19
13
|
"path": "NaiveNeuron/slovaksum",
|
|
@@ -59,7 +59,7 @@ class TurHistQuadRetrieval(AbsTaskRetrieval):
|
|
|
59
59
|
text2id = {}
|
|
60
60
|
|
|
61
61
|
for split in self.metadata.eval_splits:
|
|
62
|
-
ds: datasets.Dataset = self.dataset[split]
|
|
62
|
+
ds: datasets.Dataset = self.dataset[split]
|
|
63
63
|
ds = ds.shuffle(seed=42)
|
|
64
64
|
max_samples = min(1024, len(ds))
|
|
65
65
|
ds = ds.select(
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from .argu_ana_vn_retrieval import ArguAnaVN
|
|
2
|
-
from .climate_fevervn_retrieval import ClimateFEVERVN
|
|
2
|
+
from .climate_fevervn_retrieval import ClimateFEVERVN, NanoClimateFEVERVN
|
|
3
3
|
from .cqa_dupstack_android_vn_retrieval import CQADupstackAndroidVN
|
|
4
4
|
from .cqa_dupstack_gis_vn_retrieval import CQADupstackGisVN
|
|
5
5
|
from .cqa_dupstack_mathematica_vn_retrieval import CQADupstackMathematicaVN
|
|
@@ -10,19 +10,20 @@ from .cqa_dupstack_tex_vn_retrieval import CQADupstackTexVN
|
|
|
10
10
|
from .cqa_dupstack_unix_vn_retrieval import CQADupstackUnixVN
|
|
11
11
|
from .cqa_dupstack_webmasters_vn_retrieval import CQADupstackWebmastersVN
|
|
12
12
|
from .cqa_dupstack_wordpress_vn_retrieval import CQADupstackWordpressVN
|
|
13
|
-
from .db_pedia_vn_retrieval import DBPediaVN
|
|
14
|
-
from .fevervn_retrieval import FEVERVN
|
|
13
|
+
from .db_pedia_vn_retrieval import DBPediaVN, NanoDBPediaVN
|
|
14
|
+
from .fevervn_retrieval import FEVERVN, NanoFEVERVN
|
|
15
15
|
from .fi_qa2018_vn_retrieval import FiQA2018VN
|
|
16
16
|
from .green_node_table_markdown_retrieval import GreenNodeTableMarkdownRetrieval
|
|
17
|
-
from .hotpot_qavn_retrieval import HotpotQAVN
|
|
18
|
-
from .msmarcovn_retrieval import MSMARCOVN
|
|
17
|
+
from .hotpot_qavn_retrieval import HotpotQAVN, NanoHotpotQAVN
|
|
18
|
+
from .msmarcovn_retrieval import MSMARCOVN, NanoMSMARCOVN
|
|
19
19
|
from .nf_corpus_vn_retrieval import NFCorpusVN
|
|
20
|
-
from .nqvn_retrieval import NQVN
|
|
20
|
+
from .nqvn_retrieval import NQVN, NanoNQVN
|
|
21
21
|
from .quora_vn_retrieval import QuoraVN
|
|
22
22
|
from .sci_fact_vn_retrieval import SciFactVN
|
|
23
23
|
from .scidocsvn_retrieval import SCIDOCSVN
|
|
24
24
|
from .touche2020_vn_retrieval import Touche2020VN
|
|
25
25
|
from .treccovidvn_retrieval import TRECCOVIDVN
|
|
26
|
+
from .tvpl_retrieval import TVPLRetrieval
|
|
26
27
|
from .vie_qu_ad_retrieval import VieQuADRetrieval
|
|
27
28
|
from .zac_legal_text_retrieval import ZacLegalTextRetrieval
|
|
28
29
|
|
|
@@ -49,8 +50,15 @@ __all__ = [
|
|
|
49
50
|
"GreenNodeTableMarkdownRetrieval",
|
|
50
51
|
"HotpotQAVN",
|
|
51
52
|
"NFCorpusVN",
|
|
53
|
+
"NanoClimateFEVERVN",
|
|
54
|
+
"NanoDBPediaVN",
|
|
55
|
+
"NanoFEVERVN",
|
|
56
|
+
"NanoHotpotQAVN",
|
|
57
|
+
"NanoMSMARCOVN",
|
|
58
|
+
"NanoNQVN",
|
|
52
59
|
"QuoraVN",
|
|
53
60
|
"SciFactVN",
|
|
61
|
+
"TVPLRetrieval",
|
|
54
62
|
"Touche2020VN",
|
|
55
63
|
"VieQuADRetrieval",
|
|
56
64
|
"ZacLegalTextRetrieval",
|
|
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
|
|
|
5
5
|
class ArguAnaVN(AbsTaskRetrieval):
|
|
6
6
|
metadata = TaskMetadata(
|
|
7
7
|
name="ArguAna-VN",
|
|
8
|
-
description="
|
|
9
|
-
The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
|
|
10
|
-
- The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
|
|
11
|
-
- Applies advanced embedding models to filter the translations.
|
|
12
|
-
- Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
|
|
8
|
+
description="A translated dataset from NFCorpus: A Full-Text Learning to Rank Dataset for Medical Information Retrieval The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
|
|
13
9
|
reference="http://argumentation.bplaced.net/arguana/data",
|
|
14
10
|
dataset={
|
|
15
11
|
"path": "GreenNode/arguana-vn",
|
|
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
|
|
|
5
5
|
class ClimateFEVERVN(AbsTaskRetrieval):
|
|
6
6
|
metadata = TaskMetadata(
|
|
7
7
|
name="ClimateFEVER-VN",
|
|
8
|
-
description="
|
|
9
|
-
The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
|
|
10
|
-
- The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
|
|
11
|
-
- Applies advanced embedding models to filter the translations.
|
|
12
|
-
- Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
|
|
8
|
+
description="A translated dataset from CLIMATE-FEVER is a dataset adopting the FEVER methodology that consists of 1,535 real-world claims regarding climate-change. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
|
|
13
9
|
reference="https://www.sustainablefinance.uzh.ch/en/research/climate-fever.html",
|
|
14
10
|
dataset={
|
|
15
11
|
"path": "GreenNode/climate-fever-vn",
|
|
@@ -40,3 +36,42 @@ class ClimateFEVERVN(AbsTaskRetrieval):
|
|
|
40
36
|
""",
|
|
41
37
|
adapted_from=["ClimateFEVER"],
|
|
42
38
|
)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class NanoClimateFEVERVN(AbsTaskRetrieval):
|
|
42
|
+
metadata = TaskMetadata(
|
|
43
|
+
name="NanoClimateFEVER-VN",
|
|
44
|
+
description="NanoClimateFEVERVN is a small version of A translated dataset from CLIMATE-FEVER is a dataset adopting the FEVER methodology that consists of 1,535 real-world claims regarding climate-change. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
|
|
45
|
+
reference="https://www.sustainablefinance.uzh.ch/en/research/climate-fever.html",
|
|
46
|
+
dataset={
|
|
47
|
+
"path": "GreenNode/nano-climate-fever-vn",
|
|
48
|
+
"revision": "1852e852f07403d4529a8520d52b91ff6d57869b",
|
|
49
|
+
},
|
|
50
|
+
type="Retrieval",
|
|
51
|
+
category="t2t",
|
|
52
|
+
eval_splits=["test"],
|
|
53
|
+
eval_langs=["vie-Latn"],
|
|
54
|
+
main_score="ndcg_at_10",
|
|
55
|
+
date=("2025-07-29", "2025-07-30"),
|
|
56
|
+
license="cc-by-sa-4.0",
|
|
57
|
+
annotations_creators="derived",
|
|
58
|
+
dialect=[],
|
|
59
|
+
sample_creation="machine-translated and LM verified",
|
|
60
|
+
domains=["Encyclopaedic", "Written"],
|
|
61
|
+
task_subtypes=["Claim verification"],
|
|
62
|
+
bibtex_citation=r"""
|
|
63
|
+
@misc{pham2025vnmtebvietnamesemassivetext,
|
|
64
|
+
archiveprefix = {arXiv},
|
|
65
|
+
author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
|
|
66
|
+
eprint = {2507.21500},
|
|
67
|
+
primaryclass = {cs.CL},
|
|
68
|
+
title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
|
|
69
|
+
url = {https://arxiv.org/abs/2507.21500},
|
|
70
|
+
year = {2025},
|
|
71
|
+
}
|
|
72
|
+
""",
|
|
73
|
+
prompt={
|
|
74
|
+
"query": "Given a claim about climate change, retrieve documents that support or refute the claim"
|
|
75
|
+
},
|
|
76
|
+
adapted_from=["ClimateFEVER-VN"],
|
|
77
|
+
)
|
|
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
|
|
|
5
5
|
class CQADupstackAndroidVN(AbsTaskRetrieval):
|
|
6
6
|
metadata = TaskMetadata(
|
|
7
7
|
name="CQADupstackAndroid-VN",
|
|
8
|
-
description="
|
|
9
|
-
The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
|
|
10
|
-
- The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
|
|
11
|
-
- Applies advanced embedding models to filter the translations.
|
|
12
|
-
- Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
|
|
8
|
+
description="A translated dataset from CQADupStack: A Benchmark Data Set for Community Question-Answering Research The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
|
|
13
9
|
reference="http://nlp.cis.unimelb.edu.au/resources/cqadupstack/",
|
|
14
10
|
dataset={
|
|
15
11
|
"path": "GreenNode/cqadupstack-android-vn",
|
|
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
|
|
|
5
5
|
class CQADupstackGisVN(AbsTaskRetrieval):
|
|
6
6
|
metadata = TaskMetadata(
|
|
7
7
|
name="CQADupstackGis-VN",
|
|
8
|
-
description="
|
|
9
|
-
The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
|
|
10
|
-
- The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
|
|
11
|
-
- Applies advanced embedding models to filter the translations.
|
|
12
|
-
- Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
|
|
8
|
+
description="A translated dataset from CQADupStack: A Benchmark Data Set for Community Question-Answering Research The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
|
|
13
9
|
reference="http://nlp.cis.unimelb.edu.au/resources/cqadupstack/",
|
|
14
10
|
dataset={
|
|
15
11
|
"path": "GreenNode/cqadupstack-gis-vn",
|
|
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
|
|
|
5
5
|
class CQADupstackMathematicaVN(AbsTaskRetrieval):
|
|
6
6
|
metadata = TaskMetadata(
|
|
7
7
|
name="CQADupstackMathematica-VN",
|
|
8
|
-
description="
|
|
9
|
-
The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
|
|
10
|
-
- The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
|
|
11
|
-
- Applies advanced embedding models to filter the translations.
|
|
12
|
-
- Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
|
|
8
|
+
description="A translated dataset from CQADupStack: A Benchmark Data Set for Community Question-Answering Research The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
|
|
13
9
|
reference="http://nlp.cis.unimelb.edu.au/resources/cqadupstack/",
|
|
14
10
|
dataset={
|
|
15
11
|
"path": "GreenNode/cqadupstack-mathematica-vn",
|
|
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
|
|
|
5
5
|
class CQADupstackPhysicsVN(AbsTaskRetrieval):
|
|
6
6
|
metadata = TaskMetadata(
|
|
7
7
|
name="CQADupstackPhysics-VN",
|
|
8
|
-
description="
|
|
9
|
-
The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
|
|
10
|
-
- The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
|
|
11
|
-
- Applies advanced embedding models to filter the translations.
|
|
12
|
-
- Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
|
|
8
|
+
description="A translated dataset from CQADupStack: A Benchmark Data Set for Community Question-Answering Research The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
|
|
13
9
|
reference="http://nlp.cis.unimelb.edu.au/resources/cqadupstack/",
|
|
14
10
|
dataset={
|
|
15
11
|
"path": "GreenNode/cqadupstack-physics-vn",
|
|
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
|
|
|
5
5
|
class CQADupstackProgrammersRetrievalVN(AbsTaskRetrieval):
|
|
6
6
|
metadata = TaskMetadata(
|
|
7
7
|
name="CQADupstackProgrammers-VN",
|
|
8
|
-
description="
|
|
9
|
-
The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
|
|
10
|
-
- The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
|
|
11
|
-
- Applies advanced embedding models to filter the translations.
|
|
12
|
-
- Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
|
|
8
|
+
description="A translated dataset from CQADupStack: A Benchmark Data Set for Community Question-Answering Research The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
|
|
13
9
|
reference="http://nlp.cis.unimelb.edu.au/resources/cqadupstack/",
|
|
14
10
|
dataset={
|
|
15
11
|
"path": "GreenNode/cqadupstack-programmers-vn",
|
|
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
|
|
|
5
5
|
class CQADupstackStatsVN(AbsTaskRetrieval):
|
|
6
6
|
metadata = TaskMetadata(
|
|
7
7
|
name="CQADupstackStats-VN",
|
|
8
|
-
description="
|
|
9
|
-
The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
|
|
10
|
-
- The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
|
|
11
|
-
- Applies advanced embedding models to filter the translations.
|
|
12
|
-
- Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
|
|
8
|
+
description="A translated dataset from CQADupStack: A Benchmark Data Set for Community Question-Answering Research The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
|
|
13
9
|
reference="http://nlp.cis.unimelb.edu.au/resources/cqadupstack/",
|
|
14
10
|
dataset={
|
|
15
11
|
"path": "GreenNode/cqadupstack-stats-vn",
|
|
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
|
|
|
5
5
|
class CQADupstackTexVN(AbsTaskRetrieval):
|
|
6
6
|
metadata = TaskMetadata(
|
|
7
7
|
name="CQADupstackTex-VN",
|
|
8
|
-
description="
|
|
9
|
-
The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
|
|
10
|
-
- The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
|
|
11
|
-
- Applies advanced embedding models to filter the translations.
|
|
12
|
-
- Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
|
|
8
|
+
description="A translated dataset from CQADupStack: A Benchmark Data Set for Community Question-Answering Research The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
|
|
13
9
|
reference="http://nlp.cis.unimelb.edu.au/resources/cqadupstack/",
|
|
14
10
|
dataset={
|
|
15
11
|
"path": "GreenNode/cqadupstack-tex-vn",
|
|
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
|
|
|
5
5
|
class CQADupstackUnixVN(AbsTaskRetrieval):
|
|
6
6
|
metadata = TaskMetadata(
|
|
7
7
|
name="CQADupstackUnix-VN",
|
|
8
|
-
description="
|
|
9
|
-
The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
|
|
10
|
-
- The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
|
|
11
|
-
- Applies advanced embedding models to filter the translations.
|
|
12
|
-
- Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
|
|
8
|
+
description="A translated dataset from CQADupStack: A Benchmark Data Set for Community Question-Answering Research The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
|
|
13
9
|
reference="http://nlp.cis.unimelb.edu.au/resources/cqadupstack/",
|
|
14
10
|
dataset={
|
|
15
11
|
"path": "GreenNode/cqadupstack-unix-vn",
|
|
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
|
|
|
5
5
|
class CQADupstackWebmastersVN(AbsTaskRetrieval):
|
|
6
6
|
metadata = TaskMetadata(
|
|
7
7
|
name="CQADupstackWebmasters-VN",
|
|
8
|
-
description="
|
|
9
|
-
The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
|
|
10
|
-
- The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
|
|
11
|
-
- Applies advanced embedding models to filter the translations.
|
|
12
|
-
- Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
|
|
8
|
+
description="A translated dataset from CQADupStack: A Benchmark Data Set for Community Question-Answering Research The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
|
|
13
9
|
reference="http://nlp.cis.unimelb.edu.au/resources/cqadupstack/",
|
|
14
10
|
dataset={
|
|
15
11
|
"path": "GreenNode/cqadupstack-webmasters-vn",
|
|
@@ -9,11 +9,7 @@ class CQADupstackWordpressVN(AbsTaskRetrieval):
|
|
|
9
9
|
"path": "GreenNode/cqadupstack-wordpress-vn",
|
|
10
10
|
"revision": "2230f80e1baf42aa005731ca86577621c566fcd7",
|
|
11
11
|
},
|
|
12
|
-
description="
|
|
13
|
-
The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
|
|
14
|
-
- The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
|
|
15
|
-
- Applies advanced embedding models to filter the translations.
|
|
16
|
-
- Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
|
|
12
|
+
description="A translated dataset from CQADupStack: A Benchmark Data Set for Community Question-Answering Research The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
|
|
17
13
|
reference="http://nlp.cis.unimelb.edu.au/resources/cqadupstack/",
|
|
18
14
|
type="Retrieval",
|
|
19
15
|
category="t2t",
|
|
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
|
|
|
5
5
|
class DBPediaVN(AbsTaskRetrieval):
|
|
6
6
|
metadata = TaskMetadata(
|
|
7
7
|
name="DBPedia-VN",
|
|
8
|
-
description="
|
|
9
|
-
The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
|
|
10
|
-
- The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
|
|
11
|
-
- Applies advanced embedding models to filter the translations.
|
|
12
|
-
- Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
|
|
8
|
+
description="A translated dataset from DBpedia-Entity is a standard test collection for entity search over the DBpedia knowledge base The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
|
|
13
9
|
reference="https://github.com/iai-group/DBpedia-Entity/",
|
|
14
10
|
dataset={
|
|
15
11
|
"path": "GreenNode/dbpedia-vn",
|
|
@@ -40,3 +36,42 @@ class DBPediaVN(AbsTaskRetrieval):
|
|
|
40
36
|
""",
|
|
41
37
|
adapted_from=["DBPedia"],
|
|
42
38
|
)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class NanoDBPediaVN(AbsTaskRetrieval):
|
|
42
|
+
metadata = TaskMetadata(
|
|
43
|
+
name="NanoDBPedia-VN",
|
|
44
|
+
description="NanoDBPediaVN is a small version of A translated dataset from DBpedia-Entity is a standard test collection for entity search over the DBpedia knowledge base The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
|
|
45
|
+
reference="https://github.com/iai-group/DBpedia-Entity/",
|
|
46
|
+
dataset={
|
|
47
|
+
"path": "GreenNode/nano-dbpedia-vn",
|
|
48
|
+
"revision": "bbc3259bc63bf1e250d7034024092cc3230d5850",
|
|
49
|
+
},
|
|
50
|
+
type="Retrieval",
|
|
51
|
+
category="t2t",
|
|
52
|
+
eval_splits=["test"],
|
|
53
|
+
eval_langs=["vie-Latn"],
|
|
54
|
+
main_score="ndcg_at_10",
|
|
55
|
+
date=("2025-07-29", "2025-07-30"),
|
|
56
|
+
license="cc-by-sa-4.0",
|
|
57
|
+
annotations_creators="derived",
|
|
58
|
+
dialect=[],
|
|
59
|
+
sample_creation="machine-translated and LM verified",
|
|
60
|
+
domains=["Written", "Encyclopaedic"],
|
|
61
|
+
task_subtypes=[],
|
|
62
|
+
bibtex_citation=r"""
|
|
63
|
+
@misc{pham2025vnmtebvietnamesemassivetext,
|
|
64
|
+
archiveprefix = {arXiv},
|
|
65
|
+
author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
|
|
66
|
+
eprint = {2507.21500},
|
|
67
|
+
primaryclass = {cs.CL},
|
|
68
|
+
title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
|
|
69
|
+
url = {https://arxiv.org/abs/2507.21500},
|
|
70
|
+
year = {2025},
|
|
71
|
+
}
|
|
72
|
+
""",
|
|
73
|
+
prompt={
|
|
74
|
+
"query": "Given a query, retrieve relevant entity descriptions from DBPedia"
|
|
75
|
+
},
|
|
76
|
+
adapted_from=["DBPedia-VN"],
|
|
77
|
+
)
|
|
@@ -9,13 +9,7 @@ class FEVERVN(AbsTaskRetrieval):
|
|
|
9
9
|
"path": "GreenNode/fever-vn",
|
|
10
10
|
"revision": "a543dd8b98aed3603110c01d26db05ba39b87d49",
|
|
11
11
|
},
|
|
12
|
-
description="
|
|
13
|
-
extracted from Wikipedia and subsequently verified without knowledge of the sentence they were
|
|
14
|
-
derived from.
|
|
15
|
-
The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
|
|
16
|
-
- The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
|
|
17
|
-
- Applies advanced embedding models to filter the translations.
|
|
18
|
-
- Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
|
|
12
|
+
description="A translated dataset from FEVER (Fact Extraction and VERification) consists of 185,445 claims generated by altering sentences extracted from Wikipedia and subsequently verified without knowledge of the sentence they were derived from. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
|
|
19
13
|
reference="https://fever.ai/",
|
|
20
14
|
type="Retrieval",
|
|
21
15
|
category="t2t",
|
|
@@ -42,3 +36,42 @@ class FEVERVN(AbsTaskRetrieval):
|
|
|
42
36
|
""",
|
|
43
37
|
adapted_from=["FEVER"],
|
|
44
38
|
)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class NanoFEVERVN(AbsTaskRetrieval):
|
|
42
|
+
metadata = TaskMetadata(
|
|
43
|
+
name="NanoFEVER-VN",
|
|
44
|
+
dataset={
|
|
45
|
+
"path": "GreenNode/nano-fever-vn",
|
|
46
|
+
"revision": "457ca6b058ed19b28f2359e2d816d7527af6bef8",
|
|
47
|
+
},
|
|
48
|
+
description="NanoFEVERVN is a small version of A translated dataset from FEVER (Fact Extraction and VERification) consists of 185,445 claims generated by altering sentences extracted from Wikipedia and subsequently verified without knowledge of the sentence they were derived from. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
|
|
49
|
+
reference="https://fever.ai/",
|
|
50
|
+
type="Retrieval",
|
|
51
|
+
category="t2t",
|
|
52
|
+
eval_splits=["test"],
|
|
53
|
+
eval_langs=["vie-Latn"],
|
|
54
|
+
main_score="ndcg_at_10",
|
|
55
|
+
date=("2025-07-29", "2025-07-30"),
|
|
56
|
+
license="cc-by-sa-4.0",
|
|
57
|
+
annotations_creators="derived",
|
|
58
|
+
dialect=[],
|
|
59
|
+
sample_creation="machine-translated and LM verified",
|
|
60
|
+
domains=["Encyclopaedic", "Written"],
|
|
61
|
+
task_subtypes=["Claim verification"],
|
|
62
|
+
bibtex_citation=r"""
|
|
63
|
+
@misc{pham2025vnmtebvietnamesemassivetext,
|
|
64
|
+
archiveprefix = {arXiv},
|
|
65
|
+
author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
|
|
66
|
+
eprint = {2507.21500},
|
|
67
|
+
primaryclass = {cs.CL},
|
|
68
|
+
title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
|
|
69
|
+
url = {https://arxiv.org/abs/2507.21500},
|
|
70
|
+
year = {2025},
|
|
71
|
+
}
|
|
72
|
+
""",
|
|
73
|
+
prompt={
|
|
74
|
+
"query": "Given a claim, retrieve documents that support or refute the claim"
|
|
75
|
+
},
|
|
76
|
+
adapted_from=["FEVER-VN"],
|
|
77
|
+
)
|