mteb 2.1.7__py3-none-any.whl → 2.1.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/_create_dataloaders.py +6 -3
- mteb/_evaluators/any_sts_evaluator.py +14 -12
- mteb/_evaluators/clustering_evaluator.py +1 -1
- mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +2 -2
- mteb/_evaluators/pair_classification_evaluator.py +3 -1
- mteb/_evaluators/sklearn_evaluator.py +15 -28
- mteb/_evaluators/text/bitext_mining_evaluator.py +4 -1
- mteb/_evaluators/text/summarization_evaluator.py +4 -2
- mteb/_evaluators/zeroshot_classification_evaluator.py +2 -2
- mteb/abstasks/clustering.py +1 -1
- mteb/abstasks/multilabel_classification.py +2 -2
- mteb/abstasks/task_metadata.py +1 -0
- mteb/benchmarks/benchmark.py +9 -0
- mteb/benchmarks/benchmarks/__init__.py +2 -0
- mteb/benchmarks/benchmarks/benchmarks.py +40 -1
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3ComputerScienceRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3EnergyRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceEnRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceFrRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3HrRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3IndustrialRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3NuclearRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PharmaceuticalsRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PhysicsRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3TelecomRetrieval.json +214 -0
- mteb/models/cache_wrappers/cache_wrapper.py +1 -1
- mteb/models/model_implementations/align_models.py +6 -0
- mteb/models/model_implementations/ara_models.py +7 -0
- mteb/models/model_implementations/blip2_models.py +9 -0
- mteb/models/model_implementations/blip_models.py +19 -0
- mteb/models/model_implementations/cadet_models.py +8 -0
- mteb/models/model_implementations/cde_models.py +12 -0
- mteb/models/model_implementations/codefuse_models.py +15 -0
- mteb/models/model_implementations/codesage_models.py +12 -0
- mteb/models/model_implementations/misc_models.py +6 -0
- mteb/models/model_implementations/moco_models.py +9 -0
- mteb/models/model_implementations/openclip_models.py +16 -0
- mteb/models/model_implementations/piccolo_models.py +6 -0
- mteb/models/model_implementations/rasgaard_models.py +7 -1
- mteb/models/model_implementations/tarka_models.py +317 -0
- mteb/models/search_wrappers.py +5 -5
- mteb/tasks/bitext_mining/multilingual/ru_sci_bench_bitext_mining.py +1 -5
- mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -6
- mteb/tasks/classification/ara/ajgt.py +1 -2
- mteb/tasks/classification/ara/hotel_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/ara/tweet_emotion_classification.py +1 -2
- mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -2
- mteb/tasks/classification/ben/bengali_document_classification.py +1 -2
- mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -2
- mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -2
- mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/ces/czech_product_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/ces/czech_so_me_sentiment_classification.py +1 -2
- mteb/tasks/classification/dan/angry_tweets_classification.py +1 -2
- mteb/tasks/classification/dan/danish_political_comments_classification.py +1 -2
- mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -2
- mteb/tasks/classification/dan/dk_hate_classification.py +1 -2
- mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -2
- mteb/tasks/classification/deu/ten_k_gnad_classification.py +1 -2
- mteb/tasks/classification/eng/amazon_polarity_classification.py +1 -2
- mteb/tasks/classification/eng/arxiv_classification.py +1 -2
- mteb/tasks/classification/eng/banking77_classification.py +1 -2
- mteb/tasks/classification/eng/dbpedia_classification.py +1 -2
- mteb/tasks/classification/eng/emotion_classification.py +1 -2
- mteb/tasks/classification/eng/financial_phrasebank_classification.py +1 -2
- mteb/tasks/classification/eng/frenk_en_classification.py +1 -2
- mteb/tasks/classification/eng/gtsrb_classification.py +1 -1
- mteb/tasks/classification/eng/imdb_classification.py +1 -2
- mteb/tasks/classification/eng/legal_bench_classification.py +14 -120
- mteb/tasks/classification/eng/news_classification.py +1 -2
- mteb/tasks/classification/eng/patch_camelyon_classification.py +1 -1
- mteb/tasks/classification/eng/patent_classification.py +1 -2
- mteb/tasks/classification/eng/poem_sentiment_classification.py +1 -2
- mteb/tasks/classification/eng/sds_eye_protection_classification.py +1 -2
- mteb/tasks/classification/eng/sds_gloves_classification.py +1 -2
- mteb/tasks/classification/eng/toxic_chat_classification.py +2 -19
- mteb/tasks/classification/eng/toxic_conversations_classification.py +1 -2
- mteb/tasks/classification/eng/tweet_sentiment_extraction_classification.py +1 -2
- mteb/tasks/classification/eng/tweet_topic_single_classification.py +2 -13
- mteb/tasks/classification/eng/ucf101_classification.py +1 -5
- mteb/tasks/classification/eng/wikipedia_bio_met_chem_classification.py +1 -2
- mteb/tasks/classification/eng/wikipedia_chem_fields_classification.py +1 -2
- mteb/tasks/classification/eng/wikipedia_comp_chem_spectroscopy_classification.py +1 -2
- mteb/tasks/classification/eng/wikipedia_crystallography_analytical_classification.py +1 -2
- mteb/tasks/classification/eng/wikipedia_theoretical_applied_classification.py +1 -2
- mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -2
- mteb/tasks/classification/eng/yelp_review_full_classification.py +1 -2
- mteb/tasks/classification/est/estonian_valence.py +1 -2
- mteb/tasks/classification/fas/fa_mteb_classification.py +7 -14
- mteb/tasks/classification/fil/filipino_hate_speech_classification.py +1 -2
- mteb/tasks/classification/fin/fin_toxicity_classification.py +2 -11
- mteb/tasks/classification/fra/french_book_reviews.py +1 -2
- mteb/tasks/classification/fra/movie_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/guj/gujarati_news_classification.py +1 -2
- mteb/tasks/classification/heb/hebrew_sentiment_analysis.py +1 -2
- mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -2
- mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -2
- mteb/tasks/classification/hrv/frenk_hr_classification.py +1 -2
- mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +1 -2
- mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -2
- mteb/tasks/classification/ita/italian_linguist_acceptability_classification.py +1 -2
- mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -2
- mteb/tasks/classification/jpn/wrime_classification.py +1 -2
- mteb/tasks/classification/kan/kannada_news_classification.py +1 -2
- mteb/tasks/classification/kor/klue_tc.py +1 -2
- mteb/tasks/classification/kor/kor_hate_classification.py +2 -17
- mteb/tasks/classification/kor/kor_sarcasm_classification.py +2 -19
- mteb/tasks/classification/kur/kurdish_sentiment_classification.py +1 -2
- mteb/tasks/classification/mal/malayalam_news_classification.py +1 -2
- mteb/tasks/classification/mar/marathi_news_classification.py +1 -2
- mteb/tasks/classification/mkd/macedonian_tweet_sentiment_classification.py +1 -2
- mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -6
- mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -4
- mteb/tasks/classification/multilingual/ru_sci_bench_classification.py +4 -23
- mteb/tasks/classification/multilingual/scala_classification.py +1 -2
- mteb/tasks/classification/multilingual/sib200_classification.py +1 -6
- mteb/tasks/classification/mya/myanmar_news.py +1 -2
- mteb/tasks/classification/nep/nepali_news_classification.py +1 -2
- mteb/tasks/classification/nld/dutch_book_review_sentiment_classification.py +1 -3
- mteb/tasks/classification/nob/no_rec_classification.py +1 -2
- mteb/tasks/classification/nob/norwegian_parliament_classification.py +1 -2
- mteb/tasks/classification/ory/odia_news_classification.py +1 -2
- mteb/tasks/classification/pol/polish_classification.py +3 -6
- mteb/tasks/classification/ron/moroco.py +1 -2
- mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -2
- mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -2
- mteb/tasks/classification/rus/georeview_classification.py +1 -2
- mteb/tasks/classification/rus/headline_classification.py +1 -2
- mteb/tasks/classification/rus/inappropriateness_classification.py +1 -2
- mteb/tasks/classification/rus/ru_reviews_classification.py +1 -2
- mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -2
- mteb/tasks/classification/rus/senti_ru_eval.py +1 -2
- mteb/tasks/classification/sin/sinhala_news_classification.py +1 -2
- mteb/tasks/classification/sin/sinhala_news_source_classification.py +1 -2
- mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/slk/slovak_hate_speech_classification.py +1 -2
- mteb/tasks/classification/slk/slovak_movie_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/slv/frenk_sl_classification.py +1 -2
- mteb/tasks/classification/spa/spanish_news_classification.py +1 -2
- mteb/tasks/classification/spa/spanish_sentiment_classification.py +1 -2
- mteb/tasks/classification/ssw/siswati_news_classification.py +1 -2
- mteb/tasks/classification/swa/swahili_news_classification.py +1 -2
- mteb/tasks/classification/swe/dalaj_classification.py +1 -2
- mteb/tasks/classification/swe/swe_rec_classification.py +1 -2
- mteb/tasks/classification/swe/swedish_sentiment_classification.py +1 -2
- mteb/tasks/classification/tam/tamil_news_classification.py +1 -2
- mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +1 -2
- mteb/tasks/classification/tha/wisesight_sentiment_classification.py +1 -2
- mteb/tasks/classification/tsn/tswana_news_classification.py +1 -2
- mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +1 -2
- mteb/tasks/classification/tur/turkish_product_sentiment_classification.py +1 -2
- mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -15
- mteb/tasks/classification/urd/urdu_roman_sentiment_classification.py +1 -2
- mteb/tasks/classification/vie/amazon_counterfactual_vn_classification.py +1 -6
- mteb/tasks/classification/vie/amazon_polarity_vn_classification.py +1 -6
- mteb/tasks/classification/vie/amazon_reviews_vn_classification.py +1 -5
- mteb/tasks/classification/vie/banking77_vn_classification.py +1 -5
- mteb/tasks/classification/vie/emotion_vn_classification.py +1 -5
- mteb/tasks/classification/vie/imdb_vn_classification.py +1 -5
- mteb/tasks/classification/vie/massive_intent_vn_classification.py +1 -5
- mteb/tasks/classification/vie/massive_scenario_vn_classification.py +1 -5
- mteb/tasks/classification/vie/mtop_domain_vn_classification.py +1 -5
- mteb/tasks/classification/vie/mtop_intent_vn_classification.py +1 -5
- mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -5
- mteb/tasks/classification/vie/tweet_sentiment_extraction_vn_classification.py +1 -5
- mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -2
- mteb/tasks/classification/zho/cmteb_classification.py +5 -10
- mteb/tasks/classification/zho/yue_openrice_review_classification.py +1 -2
- mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -2
- mteb/tasks/clustering/jpn/mews_c16_ja_clustering.py +1 -3
- mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -6
- mteb/tasks/clustering/vie/reddit_clustering_p2p_vn.py +1 -5
- mteb/tasks/clustering/vie/reddit_clustering_vn.py +1 -5
- mteb/tasks/clustering/vie/stack_exchange_clustering_p2p_vn.py +1 -5
- mteb/tasks/clustering/vie/stack_exchange_clustering_vn.py +1 -5
- mteb/tasks/clustering/vie/twenty_newsgroups_clustering_vn.py +1 -5
- mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -5
- mteb/tasks/multilabel_classification/kor/kor_hate_speech_ml_classification.py +1 -9
- mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -6
- mteb/tasks/multilabel_classification/por/brazilian_toxic_tweets_classification.py +1 -6
- mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
- mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -2
- mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -6
- mteb/tasks/pair_classification/eng/legal_bench_pc.py +1 -9
- mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -5
- mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -5
- mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -5
- mteb/tasks/regression/multilingual/ru_sci_bench_regression.py +2 -6
- mteb/tasks/reranking/multilingual/x_glue_wpr_reranking.py +1 -2
- mteb/tasks/reranking/vie/ask_ubuntu_dup_questions_vn.py +1 -5
- mteb/tasks/reranking/vie/sci_docs_reranking_vn.py +1 -5
- mteb/tasks/reranking/vie/stack_overflow_dup_questions_vn.py +1 -5
- mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -8
- mteb/tasks/retrieval/jpn/ja_cwir_retrieval.py +1 -4
- mteb/tasks/retrieval/multilingual/__init__.py +22 -0
- mteb/tasks/retrieval/multilingual/mkqa_retrieval.py +1 -2
- mteb/tasks/retrieval/multilingual/mlqa_retrieval.py +1 -4
- mteb/tasks/retrieval/multilingual/multi_long_doc_retrieval.py +1 -2
- mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +2 -12
- mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +399 -0
- mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -7
- mteb/tasks/retrieval/vie/argu_ana_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_android_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_gis_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_mathematica_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_physics_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_programmers_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_stats_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_tex_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_unix_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_webmasters_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_wordpress_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/fevervn_retrieval.py +1 -7
- mteb/tasks/retrieval/vie/fi_qa2018_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +1 -6
- mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/nf_corpus_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/nqvn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/quora_vn_retrieval.py +1 -6
- mteb/tasks/retrieval/vie/sci_fact_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/scidocsvn_retrieval.py +1 -6
- mteb/tasks/retrieval/vie/touche2020_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/treccovidvn_retrieval.py +1 -5
- mteb/tasks/sts/vie/biosses_stsvn.py +1 -5
- mteb/tasks/sts/vie/sickr_stsvn.py +1 -5
- mteb/tasks/sts/vie/sts_benchmark_stsvn.py +1 -5
- mteb/tasks/zeroshot_classification/eng/gtsrb.py +1 -1
- mteb/tasks/zeroshot_classification/eng/patch_camelyon.py +1 -1
- mteb/tasks/zeroshot_classification/eng/ucf101.py +1 -5
- {mteb-2.1.7.dist-info → mteb-2.1.8.dist-info}/METADATA +1 -1
- {mteb-2.1.7.dist-info → mteb-2.1.8.dist-info}/RECORD +239 -228
- {mteb-2.1.7.dist-info → mteb-2.1.8.dist-info}/WHEEL +0 -0
- {mteb-2.1.7.dist-info → mteb-2.1.8.dist-info}/entry_points.txt +0 -0
- {mteb-2.1.7.dist-info → mteb-2.1.8.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.1.7.dist-info → mteb-2.1.8.dist-info}/top_level.txt +0 -0
|
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
|
|
|
5
5
|
class SciDocsRerankingVN(AbsTaskRetrieval):
|
|
6
6
|
metadata = TaskMetadata(
|
|
7
7
|
name="SciDocsRR-VN",
|
|
8
|
-
description="
|
|
9
|
-
The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
|
|
10
|
-
- The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
|
|
11
|
-
- Applies advanced embedding models to filter the translations.
|
|
12
|
-
- Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
|
|
8
|
+
description="A translated dataset from Ranking of related scientific papers based on their title. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
|
|
13
9
|
reference="https://allenai.org/data/scidocs",
|
|
14
10
|
dataset={
|
|
15
11
|
"path": "mteb/SciDocsRR-VN",
|
|
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
|
|
|
5
5
|
class StackOverflowDupQuestionsVN(AbsTaskRetrieval):
|
|
6
6
|
metadata = TaskMetadata(
|
|
7
7
|
name="StackOverflowDupQuestions-VN",
|
|
8
|
-
description="
|
|
9
|
-
The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
|
|
10
|
-
- The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
|
|
11
|
-
- Applies advanced embedding models to filter the translations.
|
|
12
|
-
- Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
|
|
8
|
+
description="A translated dataset from Stack Overflow Duplicate Questions Task for questions with the tags Java, JavaScript and Python The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
|
|
13
9
|
reference="https://www.microsoft.com/en-us/research/uploads/prod/2019/03/nl4se18LinkSO.pdf",
|
|
14
10
|
dataset={
|
|
15
11
|
"path": "mteb/StackOverflowDupQuestions-VN",
|
|
@@ -7,14 +7,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
|
|
|
7
7
|
class LitSearchRetrieval(AbsTaskRetrieval):
|
|
8
8
|
metadata = TaskMetadata(
|
|
9
9
|
name="LitSearchRetrieval",
|
|
10
|
-
description=""
|
|
11
|
-
The dataset contains the query set and retrieval corpus for the paper LitSearch: A Retrieval Benchmark for
|
|
12
|
-
Scientific Literature Search. It introduces LitSearch, a retrieval benchmark comprising 597 realistic literature
|
|
13
|
-
search queries about recent ML and NLP papers. LitSearch is constructed using a combination of (1) questions
|
|
14
|
-
generated by GPT-4 based on paragraphs containing inline citations from research papers and (2) questions about
|
|
15
|
-
recently published papers, manually written by their authors. All LitSearch questions were manually examined or
|
|
16
|
-
edited by experts to ensure high quality.
|
|
17
|
-
""",
|
|
10
|
+
description="The dataset contains the query set and retrieval corpus for the paper LitSearch: A Retrieval Benchmark for Scientific Literature Search. It introduces LitSearch, a retrieval benchmark comprising 597 realistic literature search queries about recent ML and NLP papers. LitSearch is constructed using a combination of (1) questions generated by GPT-4 based on paragraphs containing inline citations from research papers and (2) questions about recently published papers, manually written by their authors. All LitSearch questions were manually examined or edited by experts to ensure high quality.",
|
|
18
11
|
reference="https://github.com/princeton-nlp/LitSearch",
|
|
19
12
|
dataset={
|
|
20
13
|
"path": "princeton-nlp/LitSearch",
|
|
@@ -9,10 +9,7 @@ class JaCWIRRetrieval(AbsTaskRetrieval):
|
|
|
9
9
|
|
|
10
10
|
metadata = TaskMetadata(
|
|
11
11
|
name="JaCWIRRetrieval",
|
|
12
|
-
description="
|
|
13
|
-
5000 question texts and approximately 500k web page titles and web page introductions or summaries
|
|
14
|
-
(meta descriptions, etc.). The question texts are created based on one of the 500k web pages,
|
|
15
|
-
and that data is used as a positive example for the question text.""",
|
|
12
|
+
description="JaCWIR is a small-scale Japanese information retrieval evaluation dataset consisting of 5000 question texts and approximately 500k web page titles and web page introductions or summaries (meta descriptions, etc.). The question texts are created based on one of the 500k web pages, and that data is used as a positive example for the question text.",
|
|
16
13
|
reference="https://huggingface.co/datasets/hotchpotch/JaCWIR",
|
|
17
14
|
dataset={
|
|
18
15
|
"path": "mteb/JaCWIRRetrieval",
|
|
@@ -81,6 +81,18 @@ from .vidore2_bench_retrieval import (
|
|
|
81
81
|
Vidore2ESGReportsHLRetrieval,
|
|
82
82
|
Vidore2ESGReportsRetrieval,
|
|
83
83
|
)
|
|
84
|
+
from .vidore3_bench_retrieval import (
|
|
85
|
+
Vidore3ComputerScienceRetrieval,
|
|
86
|
+
Vidore3EnergyRetrieval,
|
|
87
|
+
Vidore3FinanceEnRetrieval,
|
|
88
|
+
Vidore3FinanceFrRetrieval,
|
|
89
|
+
Vidore3HrRetrieval,
|
|
90
|
+
Vidore3IndustrialRetrieval,
|
|
91
|
+
Vidore3NuclearRetrieval,
|
|
92
|
+
Vidore3PharmaceuticalsRetrieval,
|
|
93
|
+
Vidore3PhysicsRetrieval,
|
|
94
|
+
Vidore3TelecomRetrieval,
|
|
95
|
+
)
|
|
84
96
|
from .web_faq_retrieval import WebFAQRetrieval
|
|
85
97
|
from .wikipedia_retrieval_multilingual import WikipediaRetrievalMultilingual
|
|
86
98
|
from .wit_t2i_retrieval import WITT2IRetrieval
|
|
@@ -161,6 +173,16 @@ __all__ = [
|
|
|
161
173
|
"Vidore2ESGReportsHLRetrieval",
|
|
162
174
|
"Vidore2ESGReportsRetrieval",
|
|
163
175
|
"Vidore2EconomicsReportsRetrieval",
|
|
176
|
+
"Vidore3ComputerScienceRetrieval",
|
|
177
|
+
"Vidore3EnergyRetrieval",
|
|
178
|
+
"Vidore3FinanceEnRetrieval",
|
|
179
|
+
"Vidore3FinanceFrRetrieval",
|
|
180
|
+
"Vidore3HrRetrieval",
|
|
181
|
+
"Vidore3IndustrialRetrieval",
|
|
182
|
+
"Vidore3NuclearRetrieval",
|
|
183
|
+
"Vidore3PharmaceuticalsRetrieval",
|
|
184
|
+
"Vidore3PhysicsRetrieval",
|
|
185
|
+
"Vidore3TelecomRetrieval",
|
|
164
186
|
"WITT2IRetrieval",
|
|
165
187
|
"WebFAQRetrieval",
|
|
166
188
|
"WikipediaRetrievalMultilingual",
|
|
@@ -34,8 +34,7 @@ _EVAL_LANGS = {
|
|
|
34
34
|
class MKQARetrieval(AbsTaskRetrieval):
|
|
35
35
|
metadata = TaskMetadata(
|
|
36
36
|
name="MKQARetrieval",
|
|
37
|
-
description="
|
|
38
|
-
For each query we collect new passage-independent answers. These queries and answers are then human translated into 25 Non-English languages.""",
|
|
37
|
+
description="Multilingual Knowledge Questions & Answers (MKQA)contains 10,000 queries sampled from the Google Natural Questions dataset. For each query we collect new passage-independent answers. These queries and answers are then human translated into 25 Non-English languages.",
|
|
39
38
|
reference="https://github.com/apple/ml-mkqa",
|
|
40
39
|
dataset={
|
|
41
40
|
"path": "mteb/MKQARetrieval",
|
|
@@ -75,10 +75,7 @@ _EVAL_LANGS = extend_lang_pairs()
|
|
|
75
75
|
class MLQARetrieval(AbsTaskRetrieval):
|
|
76
76
|
metadata = TaskMetadata(
|
|
77
77
|
name="MLQARetrieval",
|
|
78
|
-
description="
|
|
79
|
-
MLQA consists of over 5K extractive QA instances (12K in English) in SQuAD format in seven languages - English, Arabic,
|
|
80
|
-
German, Spanish, Hindi, Vietnamese and Simplified Chinese. MLQA is highly parallel, with QA instances parallel between
|
|
81
|
-
4 different languages on average.""",
|
|
78
|
+
description="MLQA (MultiLingual Question Answering) is a benchmark dataset for evaluating cross-lingual question answering performance. MLQA consists of over 5K extractive QA instances (12K in English) in SQuAD format in seven languages - English, Arabic, German, Spanish, Hindi, Vietnamese and Simplified Chinese. MLQA is highly parallel, with QA instances parallel between 4 different languages on average.",
|
|
82
79
|
reference="https://huggingface.co/datasets/mlqa",
|
|
83
80
|
dataset={
|
|
84
81
|
"path": "mteb/MLQARetrieval",
|
|
@@ -21,8 +21,7 @@ _LANGUAGES = {
|
|
|
21
21
|
class MultiLongDocRetrieval(AbsTaskRetrieval):
|
|
22
22
|
metadata = TaskMetadata(
|
|
23
23
|
name="MultiLongDocRetrieval",
|
|
24
|
-
description="
|
|
25
|
-
It is constructed by sampling lengthy articles from Wikipedia, Wudao and mC4 datasets and randomly choose paragraphs from them. Then we use GPT-3.5 to generate questions based on these paragraphs. The generated question and the sampled article constitute a new text pair to the dataset.""",
|
|
24
|
+
description="Multi Long Doc Retrieval (MLDR) 'is curated by the multilingual articles from Wikipedia, Wudao and mC4 (see Table 7), and NarrativeQA (Kocˇisky ́ et al., 2018; Gu ̈nther et al., 2023), which is only for English.' (Chen et al., 2024). It is constructed by sampling lengthy articles from Wikipedia, Wudao and mC4 datasets and randomly choose paragraphs from them. Then we use GPT-3.5 to generate questions based on these paragraphs. The generated question and the sampled article constitute a new text pair to the dataset.",
|
|
26
25
|
reference="https://arxiv.org/abs/2402.03216", # also: https://huggingface.co/datasets/Shitao/MLDR
|
|
27
26
|
dataset={
|
|
28
27
|
"path": "mteb/MultiLongDocRetrieval",
|
|
@@ -68,11 +68,7 @@ class RuSciBenchCiteRetrieval(AbsTaskRetrieval):
|
|
|
68
68
|
"path": "mlsa-iai-msu-lab/ru_sci_bench_cite_retrieval",
|
|
69
69
|
"revision": "6cb447d02f41b8b775d5d9df7faf472f44d2f1db",
|
|
70
70
|
},
|
|
71
|
-
description="
|
|
72
|
-
Russia's largest electronic library of scientific publications. Given a query paper (title and abstract),
|
|
73
|
-
the goal is to retrieve papers that are directly cited by it from a larger corpus of papers.
|
|
74
|
-
The dataset for this task consists of 3,000 query papers, 15,000 relevant (cited) papers,
|
|
75
|
-
and 75,000 irrelevant papers. The task is available for both Russian and English scientific texts.""",
|
|
71
|
+
description="This task is focused on Direct Citation Prediction for scientific papers from eLibrary, Russia's largest electronic library of scientific publications. Given a query paper (title and abstract), the goal is to retrieve papers that are directly cited by it from a larger corpus of papers. The dataset for this task consists of 3,000 query papers, 15,000 relevant (cited) papers, and 75,000 irrelevant papers. The task is available for both Russian and English scientific texts.",
|
|
76
72
|
reference="https://github.com/mlsa-iai-msu-lab/ru_sci_bench_mteb",
|
|
77
73
|
type="Retrieval",
|
|
78
74
|
category="t2t",
|
|
@@ -130,13 +126,7 @@ class RuSciBenchCociteRetrieval(AbsTaskRetrieval):
|
|
|
130
126
|
"path": "mlsa-iai-msu-lab/ru_sci_bench_cocite_retrieval",
|
|
131
127
|
"revision": "a5da47a245275669d2b6ddf8f96c5338dd2428b4",
|
|
132
128
|
},
|
|
133
|
-
description="
|
|
134
|
-
Russia's largest electronic library of scientific publications. Given a query paper (title and abstract),
|
|
135
|
-
the goal is to retrieve other papers that are co-cited with it. Two papers are considered co-cited
|
|
136
|
-
if they are both cited by at least 5 of the same other papers. Similar to the Direct Citation task,
|
|
137
|
-
this task employs a retrieval setup: for a given query paper, all other papers in the corpus that
|
|
138
|
-
are not co-cited with it are considered negative examples. The task is available for both Russian
|
|
139
|
-
and English scientific texts.""",
|
|
129
|
+
description="This task focuses on Co-citation Prediction for scientific papers from eLibrary, Russia's largest electronic library of scientific publications. Given a query paper (title and abstract), the goal is to retrieve other papers that are co-cited with it. Two papers are considered co-cited if they are both cited by at least 5 of the same other papers. Similar to the Direct Citation task, this task employs a retrieval setup: for a given query paper, all other papers in the corpus that are not co-cited with it are considered negative examples. The task is available for both Russian and English scientific texts.",
|
|
140
130
|
reference="https://github.com/mlsa-iai-msu-lab/ru_sci_bench_mteb",
|
|
141
131
|
type="Retrieval",
|
|
142
132
|
category="t2t",
|
|
@@ -0,0 +1,399 @@
|
|
|
1
|
+
from mteb.abstasks.retrieval import AbsTaskRetrieval
|
|
2
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
3
|
+
|
|
4
|
+
_LANGS = {
|
|
5
|
+
"french": ["fra-Latn"],
|
|
6
|
+
"spanish": ["spa-Latn"],
|
|
7
|
+
"english": ["eng-Latn"],
|
|
8
|
+
"german": ["deu-Latn"],
|
|
9
|
+
"italian": ["ita-Latn"],
|
|
10
|
+
"portuguese": ["por-Latn"],
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class Vidore3FinanceEnRetrieval(AbsTaskRetrieval):
|
|
15
|
+
metadata = TaskMetadata(
|
|
16
|
+
name="Vidore3FinanceEnRetrieval",
|
|
17
|
+
description="Retrieve associated pages according to questions. This task, Finance - EN, is a corpus of reports from american banking companies, intended for long-document understanding tasks. Original queries were created in english, then translated to french, german, italian, portuguese and spanish.",
|
|
18
|
+
reference="https://huggingface.co/blog/QuentinJG/introducing-vidore-v3",
|
|
19
|
+
dataset={
|
|
20
|
+
"path": "vidore/vidore_v3_finance_en_mteb_format",
|
|
21
|
+
"revision": "fa78cb14152b3dde8c5defdc4e3ddf50de69dfeb",
|
|
22
|
+
},
|
|
23
|
+
type="DocumentUnderstanding",
|
|
24
|
+
category="t2i",
|
|
25
|
+
eval_splits=["test"],
|
|
26
|
+
eval_langs=_LANGS,
|
|
27
|
+
main_score="ndcg_at_10",
|
|
28
|
+
date=("2025-10-01", "2025-11-01"),
|
|
29
|
+
domains=["Financial"],
|
|
30
|
+
task_subtypes=["Image Text Retrieval"],
|
|
31
|
+
license="cc-by-4.0",
|
|
32
|
+
annotations_creators="derived",
|
|
33
|
+
dialect=[],
|
|
34
|
+
modalities=["text", "image"],
|
|
35
|
+
sample_creation="created and machine-translated",
|
|
36
|
+
bibtex_citation=r"""
|
|
37
|
+
@misc{mace2025vidorev3,
|
|
38
|
+
author = {Macé, Quentin and Loison, Antonio and EDY, Antoine and Xing, Victor and Viaud, Gautier},
|
|
39
|
+
day = {5},
|
|
40
|
+
howpublished = {\url{https://huggingface.co/blog/QuentinJG/introducing-vidore-v3}},
|
|
41
|
+
journal = {Hugging Face Blog},
|
|
42
|
+
month = {November},
|
|
43
|
+
publisher = {Hugging Face},
|
|
44
|
+
title = {ViDoRe V3: a comprehensive evaluation of retrieval for enterprise use-cases},
|
|
45
|
+
year = {2025},
|
|
46
|
+
}
|
|
47
|
+
""",
|
|
48
|
+
prompt={"query": "Find a screenshot that is relevant to the user's question."},
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class Vidore3FinanceFrRetrieval(AbsTaskRetrieval):
|
|
53
|
+
metadata = TaskMetadata(
|
|
54
|
+
name="Vidore3FinanceFrRetrieval",
|
|
55
|
+
description="Retrieve associated pages according to questions. This task, Finance - FR, is a corpus of reports from french companies in the luxury domain, intended for long-document understanding tasks. Original queries were created in french, then translated to english, german, italian, portuguese and spanish.",
|
|
56
|
+
reference="https://huggingface.co/blog/QuentinJG/introducing-vidore-v3",
|
|
57
|
+
dataset={
|
|
58
|
+
"path": "vidore/vidore_v3_finance_fr_mteb_format",
|
|
59
|
+
"revision": "8a2adfda85a7967c7252129703d9b3c7c9f038a9",
|
|
60
|
+
},
|
|
61
|
+
type="DocumentUnderstanding",
|
|
62
|
+
category="t2i",
|
|
63
|
+
eval_splits=["test"],
|
|
64
|
+
eval_langs=_LANGS,
|
|
65
|
+
main_score="ndcg_at_10",
|
|
66
|
+
date=("2025-10-01", "2025-11-01"),
|
|
67
|
+
domains=["Financial"],
|
|
68
|
+
task_subtypes=["Image Text Retrieval"],
|
|
69
|
+
license="cc-by-4.0",
|
|
70
|
+
annotations_creators="derived",
|
|
71
|
+
dialect=[],
|
|
72
|
+
sample_creation="created and machine-translated",
|
|
73
|
+
bibtex_citation=r"""
|
|
74
|
+
@misc{mace2025vidorev3,
|
|
75
|
+
author = {Macé, Quentin and Loison, Antonio and EDY, Antoine and Xing, Victor and Viaud, Gautier},
|
|
76
|
+
day = {5},
|
|
77
|
+
howpublished = {\url{https://huggingface.co/blog/QuentinJG/introducing-vidore-v3}},
|
|
78
|
+
journal = {Hugging Face Blog},
|
|
79
|
+
month = {November},
|
|
80
|
+
publisher = {Hugging Face},
|
|
81
|
+
title = {ViDoRe V3: a comprehensive evaluation of retrieval for enterprise use-cases},
|
|
82
|
+
year = {2025},
|
|
83
|
+
}
|
|
84
|
+
""",
|
|
85
|
+
prompt={"query": "Find a screenshot that is relevant to the user's question."},
|
|
86
|
+
is_public=True,
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class Vidore3IndustrialRetrieval(AbsTaskRetrieval):
|
|
91
|
+
metadata = TaskMetadata(
|
|
92
|
+
name="Vidore3IndustrialRetrieval",
|
|
93
|
+
description="Retrieve associated pages according to questions. This dataset, Industrial reports, is a corpus of technical documents on military aircraft (fueling, mechanics...), intended for complex-document understanding tasks. Original queries were created in english, then translated to french, german, italian, portuguese and spanish.",
|
|
94
|
+
reference="https://huggingface.co/blog/QuentinJG/introducing-vidore-v3",
|
|
95
|
+
dataset={
|
|
96
|
+
"path": "vidore/vidore_v3_industrial_mteb_format",
|
|
97
|
+
"revision": "f732b725cf4a70803210edfe265a04f8bd5328f6",
|
|
98
|
+
},
|
|
99
|
+
type="DocumentUnderstanding",
|
|
100
|
+
category="t2i",
|
|
101
|
+
eval_splits=["test"],
|
|
102
|
+
eval_langs=_LANGS,
|
|
103
|
+
main_score="ndcg_at_10",
|
|
104
|
+
date=("2025-10-01", "2025-11-01"),
|
|
105
|
+
domains=["Engineering"],
|
|
106
|
+
task_subtypes=["Image Text Retrieval"],
|
|
107
|
+
license="cc-by-4.0",
|
|
108
|
+
annotations_creators="derived",
|
|
109
|
+
dialect=[],
|
|
110
|
+
modalities=["text", "image"],
|
|
111
|
+
sample_creation="created and machine-translated",
|
|
112
|
+
bibtex_citation=r"""
|
|
113
|
+
@misc{mace2025vidorev3,
|
|
114
|
+
author = {Macé, Quentin and Loison, Antonio and EDY, Antoine and Xing, Victor and Viaud, Gautier},
|
|
115
|
+
day = {5},
|
|
116
|
+
howpublished = {\url{https://huggingface.co/blog/QuentinJG/introducing-vidore-v3}},
|
|
117
|
+
journal = {Hugging Face Blog},
|
|
118
|
+
month = {November},
|
|
119
|
+
publisher = {Hugging Face},
|
|
120
|
+
title = {ViDoRe V3: a comprehensive evaluation of retrieval for enterprise use-cases},
|
|
121
|
+
year = {2025},
|
|
122
|
+
}
|
|
123
|
+
""",
|
|
124
|
+
prompt={"query": "Find a screenshot that is relevant to the user's question."},
|
|
125
|
+
is_public=True,
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
class Vidore3PharmaceuticalsRetrieval(AbsTaskRetrieval):
|
|
130
|
+
metadata = TaskMetadata(
|
|
131
|
+
name="Vidore3PharmaceuticalsRetrieval",
|
|
132
|
+
description="Retrieve associated pages according to questions. This dataset, Pharmaceutical, is a corpus of slides from the FDA, intended for long-document understanding tasks. Original queries were created in english, then translated to french, german, italian, portuguese and spanish.",
|
|
133
|
+
reference="https://huggingface.co/blog/QuentinJG/introducing-vidore-v3",
|
|
134
|
+
dataset={
|
|
135
|
+
"path": "vidore/vidore_v3_pharmaceuticals_mteb_format",
|
|
136
|
+
"revision": "237ed4f43c7fb3c4df07ec4e9dd0a4366be555b0",
|
|
137
|
+
},
|
|
138
|
+
type="DocumentUnderstanding",
|
|
139
|
+
category="t2i",
|
|
140
|
+
eval_splits=["test"],
|
|
141
|
+
eval_langs=_LANGS,
|
|
142
|
+
main_score="ndcg_at_10",
|
|
143
|
+
date=("2025-10-01", "2025-11-01"),
|
|
144
|
+
domains=["Medical"],
|
|
145
|
+
task_subtypes=["Image Text Retrieval"],
|
|
146
|
+
license="cc-by-4.0",
|
|
147
|
+
annotations_creators="derived",
|
|
148
|
+
dialect=[],
|
|
149
|
+
modalities=["text", "image"],
|
|
150
|
+
sample_creation="created and machine-translated",
|
|
151
|
+
bibtex_citation=r"""
|
|
152
|
+
@misc{mace2025vidorev3,
|
|
153
|
+
author = {Macé, Quentin and Loison, Antonio and EDY, Antoine and Xing, Victor and Viaud, Gautier},
|
|
154
|
+
day = {5},
|
|
155
|
+
howpublished = {\url{https://huggingface.co/blog/QuentinJG/introducing-vidore-v3}},
|
|
156
|
+
journal = {Hugging Face Blog},
|
|
157
|
+
month = {November},
|
|
158
|
+
publisher = {Hugging Face},
|
|
159
|
+
title = {ViDoRe V3: a comprehensive evaluation of retrieval for enterprise use-cases},
|
|
160
|
+
year = {2025},
|
|
161
|
+
}
|
|
162
|
+
""",
|
|
163
|
+
prompt={"query": "Find a screenshot that is relevant to the user's question."},
|
|
164
|
+
is_public=True,
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
class Vidore3ComputerScienceRetrieval(AbsTaskRetrieval):
|
|
169
|
+
metadata = TaskMetadata(
|
|
170
|
+
name="Vidore3ComputerScienceRetrieval",
|
|
171
|
+
description="Retrieve associated pages according to questions. This dataset, Computer Science, is a corpus of textbooks from the openstacks website, intended for long-document understanding tasks. Original queries were created in english, then translated to french, german, italian, portuguese and spanish.",
|
|
172
|
+
reference="https://huggingface.co/blog/QuentinJG/introducing-vidore-v3",
|
|
173
|
+
dataset={
|
|
174
|
+
"path": "vidore/vidore_v3_computer_science_mteb_format",
|
|
175
|
+
"revision": "fb7fb69f81f7db62790f40494124b8ad22b424ab",
|
|
176
|
+
},
|
|
177
|
+
type="DocumentUnderstanding",
|
|
178
|
+
category="t2i",
|
|
179
|
+
eval_splits=["test"],
|
|
180
|
+
eval_langs=_LANGS,
|
|
181
|
+
main_score="ndcg_at_10",
|
|
182
|
+
date=("2025-10-01", "2025-11-01"),
|
|
183
|
+
domains=["Engineering", "Programming"],
|
|
184
|
+
task_subtypes=["Image Text Retrieval"],
|
|
185
|
+
license="cc-by-4.0",
|
|
186
|
+
annotations_creators="derived",
|
|
187
|
+
dialect=[],
|
|
188
|
+
modalities=["text", "image"],
|
|
189
|
+
sample_creation="created and machine-translated",
|
|
190
|
+
bibtex_citation=r"""
|
|
191
|
+
@misc{mace2025vidorev3,
|
|
192
|
+
author = {Macé, Quentin and Loison, Antonio and EDY, Antoine and Xing, Victor and Viaud, Gautier},
|
|
193
|
+
day = {5},
|
|
194
|
+
howpublished = {\url{https://huggingface.co/blog/QuentinJG/introducing-vidore-v3}},
|
|
195
|
+
journal = {Hugging Face Blog},
|
|
196
|
+
month = {November},
|
|
197
|
+
publisher = {Hugging Face},
|
|
198
|
+
title = {ViDoRe V3: a comprehensive evaluation of retrieval for enterprise use-cases},
|
|
199
|
+
year = {2025},
|
|
200
|
+
}
|
|
201
|
+
""",
|
|
202
|
+
prompt={"query": "Find a screenshot that is relevant to the user's question."},
|
|
203
|
+
is_public=True,
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
class Vidore3HrRetrieval(AbsTaskRetrieval):
|
|
208
|
+
metadata = TaskMetadata(
|
|
209
|
+
name="Vidore3HrRetrieval",
|
|
210
|
+
description="Retrieve associated pages according to questions. This dataset, HR, is a corpus of reports released by the european union, intended for complex-document understanding tasks. Original queries were created in english, then translated to french, german, italian, portuguese and spanish.",
|
|
211
|
+
reference="https://huggingface.co/blog/QuentinJG/introducing-vidore-v3",
|
|
212
|
+
dataset={
|
|
213
|
+
"path": "vidore/vidore_v3_hr_mteb_format",
|
|
214
|
+
"revision": "bc7d43d64815ed30f664168c8052106484aba7fd",
|
|
215
|
+
},
|
|
216
|
+
type="DocumentUnderstanding",
|
|
217
|
+
category="t2i",
|
|
218
|
+
eval_splits=["test"],
|
|
219
|
+
eval_langs=_LANGS,
|
|
220
|
+
main_score="ndcg_at_10",
|
|
221
|
+
date=("2025-10-01", "2025-11-01"),
|
|
222
|
+
domains=["Social"],
|
|
223
|
+
task_subtypes=["Image Text Retrieval"],
|
|
224
|
+
license="cc-by-4.0",
|
|
225
|
+
annotations_creators="derived",
|
|
226
|
+
dialect=[],
|
|
227
|
+
modalities=["text", "image"],
|
|
228
|
+
sample_creation="created and machine-translated",
|
|
229
|
+
bibtex_citation=r"""
|
|
230
|
+
@misc{mace2025vidorev3,
|
|
231
|
+
author = {Macé, Quentin and Loison, Antonio and EDY, Antoine and Xing, Victor and Viaud, Gautier},
|
|
232
|
+
day = {5},
|
|
233
|
+
howpublished = {\url{https://huggingface.co/blog/QuentinJG/introducing-vidore-v3}},
|
|
234
|
+
journal = {Hugging Face Blog},
|
|
235
|
+
month = {November},
|
|
236
|
+
publisher = {Hugging Face},
|
|
237
|
+
title = {ViDoRe V3: a comprehensive evaluation of retrieval for enterprise use-cases},
|
|
238
|
+
year = {2025},
|
|
239
|
+
}
|
|
240
|
+
""",
|
|
241
|
+
prompt={"query": "Find a screenshot that is relevant to the user's question."},
|
|
242
|
+
is_public=True,
|
|
243
|
+
)
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
class Vidore3EnergyRetrieval(AbsTaskRetrieval):
|
|
247
|
+
metadata = TaskMetadata(
|
|
248
|
+
name="Vidore3EnergyRetrieval",
|
|
249
|
+
description="Retrieve associated pages according to questions. This dataset, Energy Fr, is a corpus of reports on energy supply in europe, intended for complex-document understanding tasks. Original queries were created in french, then translated to english, german, italian, portuguese and spanish.",
|
|
250
|
+
reference="https://huggingface.co/blog/QuentinJG/introducing-vidore-v3",
|
|
251
|
+
dataset={
|
|
252
|
+
"path": "vidore/vidore_v3_energy_mteb_format",
|
|
253
|
+
"revision": "84fca99e5978604bae30f2436eacb6dbaa0532e9",
|
|
254
|
+
},
|
|
255
|
+
type="DocumentUnderstanding",
|
|
256
|
+
category="t2i",
|
|
257
|
+
eval_splits=["test"],
|
|
258
|
+
eval_langs=_LANGS,
|
|
259
|
+
main_score="ndcg_at_10",
|
|
260
|
+
date=("2025-10-01", "2025-11-01"),
|
|
261
|
+
domains=["Engineering", "Chemistry", "Academic"],
|
|
262
|
+
task_subtypes=["Image Text Retrieval"],
|
|
263
|
+
license="cc-by-4.0",
|
|
264
|
+
annotations_creators="derived",
|
|
265
|
+
dialect=[],
|
|
266
|
+
modalities=["text", "image"],
|
|
267
|
+
sample_creation="created and machine-translated",
|
|
268
|
+
bibtex_citation=r"""
|
|
269
|
+
@misc{mace2025vidorev3,
|
|
270
|
+
author = {Macé, Quentin and Loison, Antonio and EDY, Antoine and Xing, Victor and Viaud, Gautier},
|
|
271
|
+
day = {5},
|
|
272
|
+
howpublished = {\url{https://huggingface.co/blog/QuentinJG/introducing-vidore-v3}},
|
|
273
|
+
journal = {Hugging Face Blog},
|
|
274
|
+
month = {November},
|
|
275
|
+
publisher = {Hugging Face},
|
|
276
|
+
title = {ViDoRe V3: a comprehensive evaluation of retrieval for enterprise use-cases},
|
|
277
|
+
year = {2025},
|
|
278
|
+
}
|
|
279
|
+
""",
|
|
280
|
+
prompt={"query": "Find a screenshot that is relevant to the user's question."},
|
|
281
|
+
is_public=True,
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
class Vidore3PhysicsRetrieval(AbsTaskRetrieval):
|
|
286
|
+
metadata = TaskMetadata(
|
|
287
|
+
name="Vidore3PhysicsRetrieval",
|
|
288
|
+
description="Retrieve associated pages according to questions. This dataset, Physics, is a corpus of course slides on french bachelor level physics lectures, intended for complex visual understanding tasks. Original queries were created in french, then translated to english, german, italian, portuguese and spanish.",
|
|
289
|
+
reference="https://huggingface.co/blog/QuentinJG/introducing-vidore-v3",
|
|
290
|
+
dataset={
|
|
291
|
+
"path": "vidore/vidore_v3_physics_mteb_format",
|
|
292
|
+
"revision": "2c18ef90ab3ef93a9d86ecc6521cdae2a29f8300",
|
|
293
|
+
},
|
|
294
|
+
type="DocumentUnderstanding",
|
|
295
|
+
category="t2i",
|
|
296
|
+
eval_splits=["test"],
|
|
297
|
+
eval_langs=_LANGS,
|
|
298
|
+
main_score="ndcg_at_10",
|
|
299
|
+
date=("2025-10-01", "2025-11-01"),
|
|
300
|
+
domains=["Engineering", "Academic"],
|
|
301
|
+
task_subtypes=["Image Text Retrieval"],
|
|
302
|
+
license="cc-by-4.0",
|
|
303
|
+
annotations_creators="derived",
|
|
304
|
+
dialect=[],
|
|
305
|
+
modalities=["text", "image"],
|
|
306
|
+
sample_creation="created and machine-translated",
|
|
307
|
+
bibtex_citation=r"""
|
|
308
|
+
@misc{mace2025vidorev3,
|
|
309
|
+
author = {Macé, Quentin and Loison, Antonio and EDY, Antoine and Xing, Victor and Viaud, Gautier},
|
|
310
|
+
day = {5},
|
|
311
|
+
howpublished = {\url{https://huggingface.co/blog/QuentinJG/introducing-vidore-v3}},
|
|
312
|
+
journal = {Hugging Face Blog},
|
|
313
|
+
month = {November},
|
|
314
|
+
publisher = {Hugging Face},
|
|
315
|
+
title = {ViDoRe V3: a comprehensive evaluation of retrieval for enterprise use-cases},
|
|
316
|
+
year = {2025},
|
|
317
|
+
}
|
|
318
|
+
""",
|
|
319
|
+
prompt={"query": "Find a screenshot that is relevant to the user's question."},
|
|
320
|
+
is_public=True,
|
|
321
|
+
)
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
class Vidore3NuclearRetrieval(AbsTaskRetrieval):
|
|
325
|
+
metadata = TaskMetadata(
|
|
326
|
+
name="Vidore3NuclearRetrieval",
|
|
327
|
+
description="Retrieve associated pages according to questions.",
|
|
328
|
+
reference="https://huggingface.co/blog/QuentinJG/introducing-vidore-v3",
|
|
329
|
+
dataset={
|
|
330
|
+
"path": "mteb-private/Vidore3NuclearRetrieval",
|
|
331
|
+
"revision": "a463fc67fefc01152153101e88a32d5f9515e3e3",
|
|
332
|
+
},
|
|
333
|
+
type="DocumentUnderstanding",
|
|
334
|
+
category="t2i",
|
|
335
|
+
eval_splits=["test"],
|
|
336
|
+
eval_langs=_LANGS,
|
|
337
|
+
main_score="ndcg_at_10",
|
|
338
|
+
date=("2025-10-01", "2025-11-01"),
|
|
339
|
+
domains=["Engineering", "Chemistry"],
|
|
340
|
+
task_subtypes=["Image Text Retrieval"],
|
|
341
|
+
license="cc-by-4.0",
|
|
342
|
+
annotations_creators="derived",
|
|
343
|
+
dialect=[],
|
|
344
|
+
modalities=["text", "image"],
|
|
345
|
+
sample_creation="created and machine-translated",
|
|
346
|
+
bibtex_citation=r"""
|
|
347
|
+
@misc{mace2025vidorev3,
|
|
348
|
+
author = {Macé, Quentin and Loison, Antonio and EDY, Antoine and Xing, Victor and Viaud, Gautier},
|
|
349
|
+
day = {5},
|
|
350
|
+
howpublished = {\url{https://huggingface.co/blog/QuentinJG/introducing-vidore-v3}},
|
|
351
|
+
journal = {Hugging Face Blog},
|
|
352
|
+
month = {November},
|
|
353
|
+
publisher = {Hugging Face},
|
|
354
|
+
title = {ViDoRe V3: a comprehensive evaluation of retrieval for enterprise use-cases},
|
|
355
|
+
year = {2025},
|
|
356
|
+
}
|
|
357
|
+
""",
|
|
358
|
+
prompt={"query": "Find a screenshot that is relevant to the user's question."},
|
|
359
|
+
is_public=False,
|
|
360
|
+
)
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
class Vidore3TelecomRetrieval(AbsTaskRetrieval):
|
|
364
|
+
metadata = TaskMetadata(
|
|
365
|
+
name="Vidore3TelecomRetrieval",
|
|
366
|
+
description="Retrieve associated pages according to questions.",
|
|
367
|
+
reference="https://huggingface.co/blog/QuentinJG/introducing-vidore-v3",
|
|
368
|
+
dataset={
|
|
369
|
+
"path": "mteb-private/Vidore3TelecomRetrieval",
|
|
370
|
+
"revision": "a54635a274ef2835721b7cbe3eb27483b9ec964b",
|
|
371
|
+
},
|
|
372
|
+
type="DocumentUnderstanding",
|
|
373
|
+
category="t2i",
|
|
374
|
+
eval_splits=["test"],
|
|
375
|
+
eval_langs=_LANGS,
|
|
376
|
+
main_score="ndcg_at_10",
|
|
377
|
+
date=("2025-10-01", "2025-11-01"),
|
|
378
|
+
domains=["Engineering", "Programming"],
|
|
379
|
+
task_subtypes=["Image Text Retrieval"],
|
|
380
|
+
license="cc-by-4.0",
|
|
381
|
+
annotations_creators="derived",
|
|
382
|
+
dialect=[],
|
|
383
|
+
modalities=["text", "image"],
|
|
384
|
+
sample_creation="created and machine-translated",
|
|
385
|
+
bibtex_citation=r"""
|
|
386
|
+
@misc{mace2025vidorev3,
|
|
387
|
+
author = {Macé, Quentin and Loison, Antonio and EDY, Antoine and Xing, Victor and Viaud, Gautier},
|
|
388
|
+
day = {5},
|
|
389
|
+
howpublished = {\url{https://huggingface.co/blog/QuentinJG/introducing-vidore-v3}},
|
|
390
|
+
journal = {Hugging Face Blog},
|
|
391
|
+
month = {November},
|
|
392
|
+
publisher = {Hugging Face},
|
|
393
|
+
title = {ViDoRe V3: a comprehensive evaluation of retrieval for enterprise use-cases},
|
|
394
|
+
year = {2025},
|
|
395
|
+
}
|
|
396
|
+
""",
|
|
397
|
+
prompt={"query": "Find a screenshot that is relevant to the user's question."},
|
|
398
|
+
is_public=False,
|
|
399
|
+
)
|
|
@@ -7,13 +7,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
|
|
|
7
7
|
class SlovakSumRetrieval(AbsTaskRetrieval):
|
|
8
8
|
metadata = TaskMetadata(
|
|
9
9
|
name="SlovakSumRetrieval",
|
|
10
|
-
description=""
|
|
11
|
-
SlovakSum, a Slovak news summarization dataset consisting of over 200 thousand
|
|
12
|
-
news articles with titles and short abstracts obtained from multiple Slovak newspapers.
|
|
13
|
-
|
|
14
|
-
Originally intended as a summarization task, but since no human annotations were provided
|
|
15
|
-
here reformulated to a retrieval task.
|
|
16
|
-
""",
|
|
10
|
+
description="SlovakSum, a Slovak news summarization dataset consisting of over 200 thousand news articles with titles and short abstracts obtained from multiple Slovak newspapers. Originally intended as a summarization task, but since no human annotations were provided here reformulated to a retrieval task.",
|
|
17
11
|
reference="https://huggingface.co/datasets/NaiveNeuron/slovaksum",
|
|
18
12
|
dataset={
|
|
19
13
|
"path": "NaiveNeuron/slovaksum",
|
|
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
|
|
|
5
5
|
class ArguAnaVN(AbsTaskRetrieval):
|
|
6
6
|
metadata = TaskMetadata(
|
|
7
7
|
name="ArguAna-VN",
|
|
8
|
-
description="
|
|
9
|
-
The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
|
|
10
|
-
- The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
|
|
11
|
-
- Applies advanced embedding models to filter the translations.
|
|
12
|
-
- Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
|
|
8
|
+
description="A translated dataset from NFCorpus: A Full-Text Learning to Rank Dataset for Medical Information Retrieval The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
|
|
13
9
|
reference="http://argumentation.bplaced.net/arguana/data",
|
|
14
10
|
dataset={
|
|
15
11
|
"path": "GreenNode/arguana-vn",
|
|
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
|
|
|
5
5
|
class ClimateFEVERVN(AbsTaskRetrieval):
|
|
6
6
|
metadata = TaskMetadata(
|
|
7
7
|
name="ClimateFEVER-VN",
|
|
8
|
-
description="
|
|
9
|
-
The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
|
|
10
|
-
- The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
|
|
11
|
-
- Applies advanced embedding models to filter the translations.
|
|
12
|
-
- Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
|
|
8
|
+
description="A translated dataset from CLIMATE-FEVER is a dataset adopting the FEVER methodology that consists of 1,535 real-world claims regarding climate-change. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
|
|
13
9
|
reference="https://www.sustainablefinance.uzh.ch/en/research/climate-fever.html",
|
|
14
10
|
dataset={
|
|
15
11
|
"path": "GreenNode/climate-fever-vn",
|
|
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
|
|
|
5
5
|
class CQADupstackAndroidVN(AbsTaskRetrieval):
|
|
6
6
|
metadata = TaskMetadata(
|
|
7
7
|
name="CQADupstackAndroid-VN",
|
|
8
|
-
description="
|
|
9
|
-
The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
|
|
10
|
-
- The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
|
|
11
|
-
- Applies advanced embedding models to filter the translations.
|
|
12
|
-
- Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
|
|
8
|
+
description="A translated dataset from CQADupStack: A Benchmark Data Set for Community Question-Answering Research The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
|
|
13
9
|
reference="http://nlp.cis.unimelb.edu.au/resources/cqadupstack/",
|
|
14
10
|
dataset={
|
|
15
11
|
"path": "GreenNode/cqadupstack-android-vn",
|
|
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
|
|
|
5
5
|
class CQADupstackGisVN(AbsTaskRetrieval):
|
|
6
6
|
metadata = TaskMetadata(
|
|
7
7
|
name="CQADupstackGis-VN",
|
|
8
|
-
description="
|
|
9
|
-
The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
|
|
10
|
-
- The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
|
|
11
|
-
- Applies advanced embedding models to filter the translations.
|
|
12
|
-
- Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
|
|
8
|
+
description="A translated dataset from CQADupStack: A Benchmark Data Set for Community Question-Answering Research The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
|
|
13
9
|
reference="http://nlp.cis.unimelb.edu.au/resources/cqadupstack/",
|
|
14
10
|
dataset={
|
|
15
11
|
"path": "GreenNode/cqadupstack-gis-vn",
|