mteb 2.1.6__py3-none-any.whl → 2.1.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/_create_dataloaders.py +6 -3
- mteb/_evaluators/any_sts_evaluator.py +14 -12
- mteb/_evaluators/clustering_evaluator.py +1 -1
- mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +2 -2
- mteb/_evaluators/pair_classification_evaluator.py +3 -1
- mteb/_evaluators/sklearn_evaluator.py +15 -28
- mteb/_evaluators/text/bitext_mining_evaluator.py +4 -1
- mteb/_evaluators/text/summarization_evaluator.py +4 -2
- mteb/_evaluators/zeroshot_classification_evaluator.py +2 -2
- mteb/abstasks/clustering.py +1 -1
- mteb/abstasks/multilabel_classification.py +2 -2
- mteb/abstasks/task_metadata.py +1 -0
- mteb/benchmarks/benchmark.py +9 -0
- mteb/benchmarks/benchmarks/__init__.py +2 -0
- mteb/benchmarks/benchmarks/benchmarks.py +40 -1
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3ComputerScienceRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3EnergyRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceEnRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3FinanceFrRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3HrRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3IndustrialRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3NuclearRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PharmaceuticalsRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3PhysicsRetrieval.json +214 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore3TelecomRetrieval.json +214 -0
- mteb/descriptive_stats/Retrieval/ArguAna-NL.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/NFCorpus-NL.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/SCIDOCS-NL.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/SciFact-NL.v2.json +30 -0
- mteb/models/cache_wrappers/cache_wrapper.py +1 -1
- mteb/models/model_implementations/align_models.py +6 -0
- mteb/models/model_implementations/ara_models.py +7 -0
- mteb/models/model_implementations/blip2_models.py +9 -0
- mteb/models/model_implementations/blip_models.py +19 -0
- mteb/models/model_implementations/cadet_models.py +8 -0
- mteb/models/model_implementations/cde_models.py +12 -0
- mteb/models/model_implementations/codefuse_models.py +15 -0
- mteb/models/model_implementations/codesage_models.py +12 -0
- mteb/models/model_implementations/misc_models.py +6 -0
- mteb/models/model_implementations/moco_models.py +9 -0
- mteb/models/model_implementations/openclip_models.py +16 -0
- mteb/models/model_implementations/piccolo_models.py +6 -0
- mteb/models/model_implementations/rasgaard_models.py +7 -1
- mteb/models/model_implementations/tarka_models.py +317 -0
- mteb/models/search_wrappers.py +5 -5
- mteb/tasks/bitext_mining/multilingual/ru_sci_bench_bitext_mining.py +1 -5
- mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -6
- mteb/tasks/classification/ara/ajgt.py +1 -2
- mteb/tasks/classification/ara/hotel_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/ara/tweet_emotion_classification.py +1 -2
- mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -2
- mteb/tasks/classification/ben/bengali_document_classification.py +1 -2
- mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -2
- mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -2
- mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/ces/czech_product_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/ces/czech_so_me_sentiment_classification.py +1 -2
- mteb/tasks/classification/dan/angry_tweets_classification.py +1 -2
- mteb/tasks/classification/dan/danish_political_comments_classification.py +1 -2
- mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -2
- mteb/tasks/classification/dan/dk_hate_classification.py +1 -2
- mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -2
- mteb/tasks/classification/deu/ten_k_gnad_classification.py +1 -2
- mteb/tasks/classification/eng/amazon_polarity_classification.py +1 -2
- mteb/tasks/classification/eng/arxiv_classification.py +1 -2
- mteb/tasks/classification/eng/banking77_classification.py +1 -2
- mteb/tasks/classification/eng/dbpedia_classification.py +1 -2
- mteb/tasks/classification/eng/emotion_classification.py +1 -2
- mteb/tasks/classification/eng/financial_phrasebank_classification.py +1 -2
- mteb/tasks/classification/eng/frenk_en_classification.py +1 -2
- mteb/tasks/classification/eng/gtsrb_classification.py +1 -1
- mteb/tasks/classification/eng/imdb_classification.py +1 -2
- mteb/tasks/classification/eng/legal_bench_classification.py +14 -120
- mteb/tasks/classification/eng/news_classification.py +1 -2
- mteb/tasks/classification/eng/patch_camelyon_classification.py +1 -1
- mteb/tasks/classification/eng/patent_classification.py +1 -2
- mteb/tasks/classification/eng/poem_sentiment_classification.py +1 -2
- mteb/tasks/classification/eng/sds_eye_protection_classification.py +1 -2
- mteb/tasks/classification/eng/sds_gloves_classification.py +1 -2
- mteb/tasks/classification/eng/toxic_chat_classification.py +2 -19
- mteb/tasks/classification/eng/toxic_conversations_classification.py +1 -2
- mteb/tasks/classification/eng/tweet_sentiment_extraction_classification.py +1 -2
- mteb/tasks/classification/eng/tweet_topic_single_classification.py +2 -13
- mteb/tasks/classification/eng/ucf101_classification.py +1 -5
- mteb/tasks/classification/eng/wikipedia_bio_met_chem_classification.py +1 -2
- mteb/tasks/classification/eng/wikipedia_chem_fields_classification.py +1 -2
- mteb/tasks/classification/eng/wikipedia_comp_chem_spectroscopy_classification.py +1 -2
- mteb/tasks/classification/eng/wikipedia_crystallography_analytical_classification.py +1 -2
- mteb/tasks/classification/eng/wikipedia_theoretical_applied_classification.py +1 -2
- mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -2
- mteb/tasks/classification/eng/yelp_review_full_classification.py +1 -2
- mteb/tasks/classification/est/estonian_valence.py +1 -2
- mteb/tasks/classification/fas/fa_mteb_classification.py +7 -14
- mteb/tasks/classification/fil/filipino_hate_speech_classification.py +1 -2
- mteb/tasks/classification/fin/fin_toxicity_classification.py +2 -11
- mteb/tasks/classification/fra/french_book_reviews.py +1 -2
- mteb/tasks/classification/fra/movie_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/guj/gujarati_news_classification.py +1 -2
- mteb/tasks/classification/heb/hebrew_sentiment_analysis.py +1 -2
- mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -2
- mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -2
- mteb/tasks/classification/hrv/frenk_hr_classification.py +1 -2
- mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +1 -2
- mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -2
- mteb/tasks/classification/ita/italian_linguist_acceptability_classification.py +1 -2
- mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -2
- mteb/tasks/classification/jpn/wrime_classification.py +1 -2
- mteb/tasks/classification/kan/kannada_news_classification.py +1 -2
- mteb/tasks/classification/kor/klue_tc.py +1 -2
- mteb/tasks/classification/kor/kor_hate_classification.py +2 -17
- mteb/tasks/classification/kor/kor_sarcasm_classification.py +2 -19
- mteb/tasks/classification/kur/kurdish_sentiment_classification.py +1 -2
- mteb/tasks/classification/mal/malayalam_news_classification.py +1 -2
- mteb/tasks/classification/mar/marathi_news_classification.py +1 -2
- mteb/tasks/classification/mkd/macedonian_tweet_sentiment_classification.py +1 -2
- mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -6
- mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -4
- mteb/tasks/classification/multilingual/ru_sci_bench_classification.py +4 -23
- mteb/tasks/classification/multilingual/scala_classification.py +1 -2
- mteb/tasks/classification/multilingual/sib200_classification.py +1 -6
- mteb/tasks/classification/mya/myanmar_news.py +1 -2
- mteb/tasks/classification/nep/nepali_news_classification.py +1 -2
- mteb/tasks/classification/nld/dutch_book_review_sentiment_classification.py +4 -2
- mteb/tasks/classification/nld/dutch_cola_classification.py +3 -0
- mteb/tasks/classification/nld/dutch_government_bias_classification.py +3 -0
- mteb/tasks/classification/nld/dutch_news_articles_classification.py +3 -0
- mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +3 -0
- mteb/tasks/classification/nld/iconclass_classification.py +3 -0
- mteb/tasks/classification/nld/open_tender_classification.py +3 -0
- mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +3 -0
- mteb/tasks/classification/nob/no_rec_classification.py +1 -2
- mteb/tasks/classification/nob/norwegian_parliament_classification.py +1 -2
- mteb/tasks/classification/ory/odia_news_classification.py +1 -2
- mteb/tasks/classification/pol/polish_classification.py +3 -6
- mteb/tasks/classification/ron/moroco.py +1 -2
- mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -2
- mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -2
- mteb/tasks/classification/rus/georeview_classification.py +1 -2
- mteb/tasks/classification/rus/headline_classification.py +1 -2
- mteb/tasks/classification/rus/inappropriateness_classification.py +1 -2
- mteb/tasks/classification/rus/ru_reviews_classification.py +1 -2
- mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -2
- mteb/tasks/classification/rus/senti_ru_eval.py +1 -2
- mteb/tasks/classification/sin/sinhala_news_classification.py +1 -2
- mteb/tasks/classification/sin/sinhala_news_source_classification.py +1 -2
- mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/slk/slovak_hate_speech_classification.py +1 -2
- mteb/tasks/classification/slk/slovak_movie_review_sentiment_classification.py +1 -2
- mteb/tasks/classification/slv/frenk_sl_classification.py +1 -2
- mteb/tasks/classification/spa/spanish_news_classification.py +1 -2
- mteb/tasks/classification/spa/spanish_sentiment_classification.py +1 -2
- mteb/tasks/classification/ssw/siswati_news_classification.py +1 -2
- mteb/tasks/classification/swa/swahili_news_classification.py +1 -2
- mteb/tasks/classification/swe/dalaj_classification.py +1 -2
- mteb/tasks/classification/swe/swe_rec_classification.py +1 -2
- mteb/tasks/classification/swe/swedish_sentiment_classification.py +1 -2
- mteb/tasks/classification/tam/tamil_news_classification.py +1 -2
- mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +1 -2
- mteb/tasks/classification/tha/wisesight_sentiment_classification.py +1 -2
- mteb/tasks/classification/tsn/tswana_news_classification.py +1 -2
- mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +1 -2
- mteb/tasks/classification/tur/turkish_product_sentiment_classification.py +1 -2
- mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -15
- mteb/tasks/classification/urd/urdu_roman_sentiment_classification.py +1 -2
- mteb/tasks/classification/vie/amazon_counterfactual_vn_classification.py +1 -6
- mteb/tasks/classification/vie/amazon_polarity_vn_classification.py +1 -6
- mteb/tasks/classification/vie/amazon_reviews_vn_classification.py +1 -5
- mteb/tasks/classification/vie/banking77_vn_classification.py +1 -5
- mteb/tasks/classification/vie/emotion_vn_classification.py +1 -5
- mteb/tasks/classification/vie/imdb_vn_classification.py +1 -5
- mteb/tasks/classification/vie/massive_intent_vn_classification.py +1 -5
- mteb/tasks/classification/vie/massive_scenario_vn_classification.py +1 -5
- mteb/tasks/classification/vie/mtop_domain_vn_classification.py +1 -5
- mteb/tasks/classification/vie/mtop_intent_vn_classification.py +1 -5
- mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -5
- mteb/tasks/classification/vie/tweet_sentiment_extraction_vn_classification.py +1 -5
- mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -2
- mteb/tasks/classification/zho/cmteb_classification.py +5 -10
- mteb/tasks/classification/zho/yue_openrice_review_classification.py +1 -2
- mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -2
- mteb/tasks/clustering/jpn/mews_c16_ja_clustering.py +1 -3
- mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -6
- mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +3 -0
- mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +3 -0
- mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +3 -0
- mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +3 -0
- mteb/tasks/clustering/nld/open_tender_clustering_s2s.py +3 -0
- mteb/tasks/clustering/nld/vabb_clustering_p2p.py +3 -0
- mteb/tasks/clustering/nld/vabb_clustering_s2s.py +3 -0
- mteb/tasks/clustering/vie/reddit_clustering_p2p_vn.py +1 -5
- mteb/tasks/clustering/vie/reddit_clustering_vn.py +1 -5
- mteb/tasks/clustering/vie/stack_exchange_clustering_p2p_vn.py +1 -5
- mteb/tasks/clustering/vie/stack_exchange_clustering_vn.py +1 -5
- mteb/tasks/clustering/vie/twenty_newsgroups_clustering_vn.py +1 -5
- mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -5
- mteb/tasks/multilabel_classification/kor/kor_hate_speech_ml_classification.py +1 -9
- mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -6
- mteb/tasks/multilabel_classification/nld/covid_disinformation_nl_multi_label_classification.py +3 -0
- mteb/tasks/multilabel_classification/nld/vabb_multi_label_classification.py +3 -0
- mteb/tasks/multilabel_classification/por/brazilian_toxic_tweets_classification.py +1 -6
- mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
- mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -2
- mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -6
- mteb/tasks/pair_classification/eng/legal_bench_pc.py +1 -9
- mteb/tasks/pair_classification/nld/sick_nl_pair_classification.py +3 -0
- mteb/tasks/pair_classification/nld/xlwic_nl_pair_classification.py +3 -0
- mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -5
- mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -5
- mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -5
- mteb/tasks/regression/multilingual/ru_sci_bench_regression.py +2 -6
- mteb/tasks/reranking/multilingual/x_glue_wpr_reranking.py +1 -2
- mteb/tasks/reranking/vie/ask_ubuntu_dup_questions_vn.py +1 -5
- mteb/tasks/reranking/vie/sci_docs_reranking_vn.py +1 -5
- mteb/tasks/reranking/vie/stack_overflow_dup_questions_vn.py +1 -5
- mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -8
- mteb/tasks/retrieval/jpn/ja_cwir_retrieval.py +1 -4
- mteb/tasks/retrieval/multilingual/__init__.py +22 -0
- mteb/tasks/retrieval/multilingual/mkqa_retrieval.py +1 -2
- mteb/tasks/retrieval/multilingual/mlqa_retrieval.py +1 -4
- mteb/tasks/retrieval/multilingual/multi_long_doc_retrieval.py +1 -2
- mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +2 -12
- mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +399 -0
- mteb/tasks/retrieval/nld/__init__.py +8 -4
- mteb/tasks/retrieval/nld/argu_ana_nl_retrieval.py +46 -27
- mteb/tasks/retrieval/nld/bbsard_nl_retrieval.py +3 -0
- mteb/tasks/retrieval/nld/dutch_news_articles_retrieval.py +3 -0
- mteb/tasks/retrieval/nld/legal_qa_nl_retrieval.py +3 -0
- mteb/tasks/retrieval/nld/nf_corpus_nl_retrieval.py +42 -25
- mteb/tasks/retrieval/nld/open_tender_retrieval.py +3 -0
- mteb/tasks/retrieval/nld/sci_fact_nl_retrieval.py +42 -24
- mteb/tasks/retrieval/nld/scidocsnl_retrieval.py +44 -27
- mteb/tasks/retrieval/nld/vabb_retrieval.py +3 -0
- mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -7
- mteb/tasks/retrieval/vie/argu_ana_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_android_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_gis_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_mathematica_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_physics_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_programmers_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_stats_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_tex_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_unix_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_webmasters_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/cqa_dupstack_wordpress_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/fevervn_retrieval.py +1 -7
- mteb/tasks/retrieval/vie/fi_qa2018_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +1 -6
- mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/nf_corpus_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/nqvn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/quora_vn_retrieval.py +1 -6
- mteb/tasks/retrieval/vie/sci_fact_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/scidocsvn_retrieval.py +1 -6
- mteb/tasks/retrieval/vie/touche2020_vn_retrieval.py +1 -5
- mteb/tasks/retrieval/vie/treccovidvn_retrieval.py +1 -5
- mteb/tasks/sts/nld/sick_nl_sts.py +1 -0
- mteb/tasks/sts/vie/biosses_stsvn.py +1 -5
- mteb/tasks/sts/vie/sickr_stsvn.py +1 -5
- mteb/tasks/sts/vie/sts_benchmark_stsvn.py +1 -5
- mteb/tasks/zeroshot_classification/eng/gtsrb.py +1 -1
- mteb/tasks/zeroshot_classification/eng/patch_camelyon.py +1 -1
- mteb/tasks/zeroshot_classification/eng/ucf101.py +1 -5
- {mteb-2.1.6.dist-info → mteb-2.1.8.dist-info}/METADATA +1 -1
- {mteb-2.1.6.dist-info → mteb-2.1.8.dist-info}/RECORD +272 -257
- {mteb-2.1.6.dist-info → mteb-2.1.8.dist-info}/WHEEL +0 -0
- {mteb-2.1.6.dist-info → mteb-2.1.8.dist-info}/entry_points.txt +0 -0
- {mteb-2.1.6.dist-info → mteb-2.1.8.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.1.6.dist-info → mteb-2.1.8.dist-info}/top_level.txt +0 -0
|
@@ -7,8 +7,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
|
|
|
7
7
|
class SwedishPatentCPCSubclassClassification(AbsTaskMultilabelClassification):
|
|
8
8
|
metadata = TaskMetadata(
|
|
9
9
|
name="SwedishPatentCPCSubclassClassification",
|
|
10
|
-
description="
|
|
11
|
-
The dataset includes patent claims text extracted from digitally recreated versions of historical Swedish patents, generated using Optical Character Recognition (OCR) from original paper documents. The text quality varies due to OCR limitations, but all CPC labels were manually assigned by patent engineers at PRV (Swedish Patent and Registration Office), ensuring high reliability for machine learning applications.""",
|
|
10
|
+
description="This dataset contains historical Swedish patent documents (1885-1972) classified according to the Cooperative Patent Classification (CPC) system. Each document can have multiple labels, making this a multi-label classification task with significant implications for patent retrieval and prior art search. The dataset includes patent claims text extracted from digitally recreated versions of historical Swedish patents, generated using Optical Character Recognition (OCR) from original paper documents. The text quality varies due to OCR limitations, but all CPC labels were manually assigned by patent engineers at PRV (Swedish Patent and Registration Office), ensuring high reliability for machine learning applications.",
|
|
12
11
|
reference="https://urn.kb.se/resolve?urn=urn:nbn:se:kth:diva-368254",
|
|
13
12
|
type="MultilabelClassification",
|
|
14
13
|
category="t2t",
|
|
@@ -5,12 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
|
|
|
5
5
|
class TalemaaderPC(AbsTaskPairClassification):
|
|
6
6
|
metadata = TaskMetadata(
|
|
7
7
|
name="TalemaaderPC",
|
|
8
|
-
description=""
|
|
9
|
-
The Danish Language and Literature Society has developed a dataset for evaluating language models in Danish.
|
|
10
|
-
The dataset contains a total of 1000 Danish idioms and fixed expressions with transferred meanings based on the Danish Dictionary's collection of fixed expressions with associated definitions.
|
|
11
|
-
For each of the 1000 idioms and fixed expressions, three false definitions have also been prepared.
|
|
12
|
-
The dataset can be used to test the performance of language models in identifying correct definitions for Danish idioms and fixed expressions.
|
|
13
|
-
""",
|
|
8
|
+
description="\\ The Danish Language and Literature Society has developed a dataset for evaluating language models in Danish. The dataset contains a total of 1000 Danish idioms and fixed expressions with transferred meanings based on the Danish Dictionary's collection of fixed expressions with associated definitions. For each of the 1000 idioms and fixed expressions, three false definitions have also been prepared. The dataset can be used to test the performance of language models in identifying correct definitions for Danish idioms and fixed expressions.",
|
|
14
9
|
reference="https://sprogteknologi.dk/dataset/1000-talemader-evalueringsdatasaet",
|
|
15
10
|
dataset={
|
|
16
11
|
"path": "mteb/talemaader_pc",
|
|
@@ -50,15 +50,7 @@ _DATASET_COLUMN_MAP = [
|
|
|
50
50
|
class LegalBenchPC(AbsTaskPairClassification):
|
|
51
51
|
metadata = TaskMetadata(
|
|
52
52
|
name="LegalBenchPC",
|
|
53
|
-
description="
|
|
54
|
-
|
|
55
|
-
- Citation Prediction Classification: Given a legal statement and a case citation, determine if the citation is supportive of the legal statement.
|
|
56
|
-
- Consumer Contracts QA: The task consists of 400 yes/no questions relating to consumer contracts (specifically, online terms of service) and is relevant to the legal skill of contract interpretation.
|
|
57
|
-
- Contract QA: Answer yes/no questions about whether contractual clauses discuss particular issues like confidentiality requirements, BIPA consent, PII data breaches, breach of contract etc.
|
|
58
|
-
- Hearsay: Classify if a particular piece of evidence qualifies as hearsay. Each sample in the dataset describes (1) an issue being litigated or an assertion a party wishes to prove, and (2) a piece of evidence a party wishes to introduce. The goal is to determine if—as it relates to the issue—the evidence would be considered hearsay under the definition provided above.
|
|
59
|
-
- Privacy Policy Entailment: Given a privacy policy clause and a description of the clause, determine if the description is correct. This is a binary classification task in which the LLM is provided with a clause from a privacy policy, and a description of that clause (e.g., “The policy describes collection of the user’s HTTP cookies, flash cookies, pixel tags, or similar identifiers by a party to the contract.”).
|
|
60
|
-
- Privacy Policy QA: Given a question and a clause from a privacy policy, determine if the clause contains enough information to answer the question. This is a binary classification task in which the LLM is provided with a question (e.g., “do you publish my data”) and a clause from a privacy policy. The LLM must determine if the clause contains an answer to the question, and classify the question-clause pair.
|
|
61
|
-
""",
|
|
53
|
+
description="This LegalBench pair classification task is a combination of the following datasets: - Citation Prediction Classification: Given a legal statement and a case citation, determine if the citation is supportive of the legal statement. - Consumer Contracts QA: The task consists of 400 yes/no questions relating to consumer contracts (specifically, online terms of service) and is relevant to the legal skill of contract interpretation. - Contract QA: Answer yes/no questions about whether contractual clauses discuss particular issues like confidentiality requirements, BIPA consent, PII data breaches, breach of contract etc. - Hearsay: Classify if a particular piece of evidence qualifies as hearsay. Each sample in the dataset describes (1) an issue being litigated or an assertion a party wishes to prove, and (2) a piece of evidence a party wishes to introduce. The goal is to determine if—as it relates to the issue—the evidence would be considered hearsay under the definition provided above. - Privacy Policy Entailment: Given a privacy policy clause and a description of the clause, determine if the description is correct. This is a binary classification task in which the LLM is provided with a clause from a privacy policy, and a description of that clause (e.g., “The policy describes collection of the user’s HTTP cookies, flash cookies, pixel tags, or similar identifiers by a party to the contract.”). - Privacy Policy QA: Given a question and a clause from a privacy policy, determine if the clause contains enough information to answer the question. This is a binary classification task in which the LLM is provided with a question (e.g., “do you publish my data”) and a clause from a privacy policy. The LLM must determine if the clause contains an answer to the question, and classify the question-clause pair.",
|
|
62
54
|
reference="https://huggingface.co/datasets/nguha/legalbench",
|
|
63
55
|
dataset={
|
|
64
56
|
"path": "mteb/LegalBenchPC",
|
|
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
|
|
|
5
5
|
class SprintDuplicateQuestionsPCVN(AbsTaskPairClassification):
|
|
6
6
|
metadata = TaskMetadata(
|
|
7
7
|
name="SprintDuplicateQuestions-VN",
|
|
8
|
-
description="
|
|
9
|
-
The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
|
|
10
|
-
- The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
|
|
11
|
-
- Applies advanced embedding models to filter the translations.
|
|
12
|
-
- Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
|
|
8
|
+
description="A translated dataset from Duplicate questions from the Sprint community. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
|
|
13
9
|
reference="https://www.aclweb.org/anthology/D18-1131/",
|
|
14
10
|
dataset={
|
|
15
11
|
"path": "GreenNode/sprintduplicatequestions-pairclassification-vn",
|
|
@@ -9,11 +9,7 @@ class TwitterSemEval2015PCVN(AbsTaskPairClassification):
|
|
|
9
9
|
"path": "GreenNode/twittersemeval2015-pairclassification-vn",
|
|
10
10
|
"revision": "9215a3c954078fd15c2bbecca914477d53944de1",
|
|
11
11
|
},
|
|
12
|
-
description="
|
|
13
|
-
The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
|
|
14
|
-
- The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
|
|
15
|
-
- Applies advanced embedding models to filter the translations.
|
|
16
|
-
- Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
|
|
12
|
+
description="A translated dataset from Paraphrase-Pairs of Tweets from the SemEval 2015 workshop. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
|
|
17
13
|
reference="https://alt.qcri.org/semeval2015/task1/",
|
|
18
14
|
category="t2c",
|
|
19
15
|
type="PairClassification",
|
|
@@ -9,11 +9,7 @@ class TwitterURLCorpusPC(AbsTaskPairClassification):
|
|
|
9
9
|
"path": "GreenNode/twitterurlcorpus-pairclassification-vn",
|
|
10
10
|
"revision": "6e6a40aaade2129f70432f2156a6d24b63d72be3",
|
|
11
11
|
},
|
|
12
|
-
description="
|
|
13
|
-
The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
|
|
14
|
-
- The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
|
|
15
|
-
- Applies advanced embedding models to filter the translations.
|
|
16
|
-
- Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
|
|
12
|
+
description="A translated dataset from Paraphrase-Pairs of Tweets. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
|
|
17
13
|
reference="https://languagenet.github.io/",
|
|
18
14
|
category="t2c",
|
|
19
15
|
type="PairClassification",
|
|
@@ -5,9 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
|
|
|
5
5
|
class RuSciBenchCitedCountRegression(AbsTaskRegression):
|
|
6
6
|
metadata = TaskMetadata(
|
|
7
7
|
name="RuSciBenchCitedCountRegression",
|
|
8
|
-
description="
|
|
9
|
-
The prediction is based on the article's title and abstract. The data is sourced from the Russian electronic
|
|
10
|
-
library of scientific publications (eLibrary.ru) and includes papers with both Russian and English abstracts.""",
|
|
8
|
+
description="Predicts the number of times a scientific article has been cited by other papers. The prediction is based on the article's title and abstract. The data is sourced from the Russian electronic library of scientific publications (eLibrary.ru) and includes papers with both Russian and English abstracts.",
|
|
11
9
|
reference="https://github.com/mlsa-iai-msu-lab/ru_sci_bench_mteb",
|
|
12
10
|
dataset={
|
|
13
11
|
"path": "mlsa-iai-msu-lab/ru_sci_bench_mteb",
|
|
@@ -51,9 +49,7 @@ class RuSciBenchCitedCountRegression(AbsTaskRegression):
|
|
|
51
49
|
class RuSciBenchYearPublRegression(AbsTaskRegression):
|
|
52
50
|
metadata = TaskMetadata(
|
|
53
51
|
name="RuSciBenchYearPublRegression",
|
|
54
|
-
description="
|
|
55
|
-
article's title and abstract. The data is sourced from the Russian electronic library of scientific
|
|
56
|
-
publications (eLibrary.ru) and includes papers with both Russian and English abstracts.""",
|
|
52
|
+
description="Predicts the publication year of a scientific article. The prediction is based on the article's title and abstract. The data is sourced from the Russian electronic library of scientific publications (eLibrary.ru) and includes papers with both Russian and English abstracts.",
|
|
57
53
|
reference="https://github.com/mlsa-iai-msu-lab/ru_sci_bench_mteb",
|
|
58
54
|
dataset={
|
|
59
55
|
"path": "mlsa-iai-msu-lab/ru_sci_bench_mteb",
|
|
@@ -78,8 +78,7 @@ _CITATION = r"""
|
|
|
78
78
|
class XGlueWPRReranking(AbsTaskRetrieval):
|
|
79
79
|
metadata = TaskMetadata(
|
|
80
80
|
name="XGlueWPRReranking",
|
|
81
|
-
description="
|
|
82
|
-
with respect to cross-lingual natural language understanding and generation. XGLUE is composed of 11 tasks spans 19 languages.""",
|
|
81
|
+
description="XGLUE is a new benchmark dataset to evaluate the performance of cross-lingual pre-trained models with respect to cross-lingual natural language understanding and generation. XGLUE is composed of 11 tasks spans 19 languages.",
|
|
83
82
|
reference="https://github.com/microsoft/XGLUE",
|
|
84
83
|
dataset={
|
|
85
84
|
"path": "mteb/XGlueWPRReranking",
|
|
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
|
|
|
5
5
|
class AskUbuntuDupQuestionsVN(AbsTaskRetrieval):
|
|
6
6
|
metadata = TaskMetadata(
|
|
7
7
|
name="AskUbuntuDupQuestions-VN",
|
|
8
|
-
description="
|
|
9
|
-
The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
|
|
10
|
-
- The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
|
|
11
|
-
- Applies advanced embedding models to filter the translations.
|
|
12
|
-
- Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
|
|
8
|
+
description="A translated dataset from AskUbuntu Question Dataset - Questions from AskUbuntu with manual annotations marking pairs of questions as similar or non-similar The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
|
|
13
9
|
reference="https://github.com/taolei87/askubuntu",
|
|
14
10
|
dataset={
|
|
15
11
|
"path": "mteb/AskUbuntuDupQuestions-VN",
|
|
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
|
|
|
5
5
|
class SciDocsRerankingVN(AbsTaskRetrieval):
|
|
6
6
|
metadata = TaskMetadata(
|
|
7
7
|
name="SciDocsRR-VN",
|
|
8
|
-
description="
|
|
9
|
-
The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
|
|
10
|
-
- The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
|
|
11
|
-
- Applies advanced embedding models to filter the translations.
|
|
12
|
-
- Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
|
|
8
|
+
description="A translated dataset from Ranking of related scientific papers based on their title. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
|
|
13
9
|
reference="https://allenai.org/data/scidocs",
|
|
14
10
|
dataset={
|
|
15
11
|
"path": "mteb/SciDocsRR-VN",
|
|
@@ -5,11 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
|
|
|
5
5
|
class StackOverflowDupQuestionsVN(AbsTaskRetrieval):
|
|
6
6
|
metadata = TaskMetadata(
|
|
7
7
|
name="StackOverflowDupQuestions-VN",
|
|
8
|
-
description="
|
|
9
|
-
The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system:
|
|
10
|
-
- The system uses large language models (LLMs), specifically Coherence's Aya model, for translation.
|
|
11
|
-
- Applies advanced embedding models to filter the translations.
|
|
12
|
-
- Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.""",
|
|
8
|
+
description="A translated dataset from Stack Overflow Duplicate Questions Task for questions with the tags Java, JavaScript and Python The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
|
|
13
9
|
reference="https://www.microsoft.com/en-us/research/uploads/prod/2019/03/nl4se18LinkSO.pdf",
|
|
14
10
|
dataset={
|
|
15
11
|
"path": "mteb/StackOverflowDupQuestions-VN",
|
|
@@ -7,14 +7,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
|
|
|
7
7
|
class LitSearchRetrieval(AbsTaskRetrieval):
|
|
8
8
|
metadata = TaskMetadata(
|
|
9
9
|
name="LitSearchRetrieval",
|
|
10
|
-
description=""
|
|
11
|
-
The dataset contains the query set and retrieval corpus for the paper LitSearch: A Retrieval Benchmark for
|
|
12
|
-
Scientific Literature Search. It introduces LitSearch, a retrieval benchmark comprising 597 realistic literature
|
|
13
|
-
search queries about recent ML and NLP papers. LitSearch is constructed using a combination of (1) questions
|
|
14
|
-
generated by GPT-4 based on paragraphs containing inline citations from research papers and (2) questions about
|
|
15
|
-
recently published papers, manually written by their authors. All LitSearch questions were manually examined or
|
|
16
|
-
edited by experts to ensure high quality.
|
|
17
|
-
""",
|
|
10
|
+
description="The dataset contains the query set and retrieval corpus for the paper LitSearch: A Retrieval Benchmark for Scientific Literature Search. It introduces LitSearch, a retrieval benchmark comprising 597 realistic literature search queries about recent ML and NLP papers. LitSearch is constructed using a combination of (1) questions generated by GPT-4 based on paragraphs containing inline citations from research papers and (2) questions about recently published papers, manually written by their authors. All LitSearch questions were manually examined or edited by experts to ensure high quality.",
|
|
18
11
|
reference="https://github.com/princeton-nlp/LitSearch",
|
|
19
12
|
dataset={
|
|
20
13
|
"path": "princeton-nlp/LitSearch",
|
|
@@ -9,10 +9,7 @@ class JaCWIRRetrieval(AbsTaskRetrieval):
|
|
|
9
9
|
|
|
10
10
|
metadata = TaskMetadata(
|
|
11
11
|
name="JaCWIRRetrieval",
|
|
12
|
-
description="
|
|
13
|
-
5000 question texts and approximately 500k web page titles and web page introductions or summaries
|
|
14
|
-
(meta descriptions, etc.). The question texts are created based on one of the 500k web pages,
|
|
15
|
-
and that data is used as a positive example for the question text.""",
|
|
12
|
+
description="JaCWIR is a small-scale Japanese information retrieval evaluation dataset consisting of 5000 question texts and approximately 500k web page titles and web page introductions or summaries (meta descriptions, etc.). The question texts are created based on one of the 500k web pages, and that data is used as a positive example for the question text.",
|
|
16
13
|
reference="https://huggingface.co/datasets/hotchpotch/JaCWIR",
|
|
17
14
|
dataset={
|
|
18
15
|
"path": "mteb/JaCWIRRetrieval",
|
|
@@ -81,6 +81,18 @@ from .vidore2_bench_retrieval import (
|
|
|
81
81
|
Vidore2ESGReportsHLRetrieval,
|
|
82
82
|
Vidore2ESGReportsRetrieval,
|
|
83
83
|
)
|
|
84
|
+
from .vidore3_bench_retrieval import (
|
|
85
|
+
Vidore3ComputerScienceRetrieval,
|
|
86
|
+
Vidore3EnergyRetrieval,
|
|
87
|
+
Vidore3FinanceEnRetrieval,
|
|
88
|
+
Vidore3FinanceFrRetrieval,
|
|
89
|
+
Vidore3HrRetrieval,
|
|
90
|
+
Vidore3IndustrialRetrieval,
|
|
91
|
+
Vidore3NuclearRetrieval,
|
|
92
|
+
Vidore3PharmaceuticalsRetrieval,
|
|
93
|
+
Vidore3PhysicsRetrieval,
|
|
94
|
+
Vidore3TelecomRetrieval,
|
|
95
|
+
)
|
|
84
96
|
from .web_faq_retrieval import WebFAQRetrieval
|
|
85
97
|
from .wikipedia_retrieval_multilingual import WikipediaRetrievalMultilingual
|
|
86
98
|
from .wit_t2i_retrieval import WITT2IRetrieval
|
|
@@ -161,6 +173,16 @@ __all__ = [
|
|
|
161
173
|
"Vidore2ESGReportsHLRetrieval",
|
|
162
174
|
"Vidore2ESGReportsRetrieval",
|
|
163
175
|
"Vidore2EconomicsReportsRetrieval",
|
|
176
|
+
"Vidore3ComputerScienceRetrieval",
|
|
177
|
+
"Vidore3EnergyRetrieval",
|
|
178
|
+
"Vidore3FinanceEnRetrieval",
|
|
179
|
+
"Vidore3FinanceFrRetrieval",
|
|
180
|
+
"Vidore3HrRetrieval",
|
|
181
|
+
"Vidore3IndustrialRetrieval",
|
|
182
|
+
"Vidore3NuclearRetrieval",
|
|
183
|
+
"Vidore3PharmaceuticalsRetrieval",
|
|
184
|
+
"Vidore3PhysicsRetrieval",
|
|
185
|
+
"Vidore3TelecomRetrieval",
|
|
164
186
|
"WITT2IRetrieval",
|
|
165
187
|
"WebFAQRetrieval",
|
|
166
188
|
"WikipediaRetrievalMultilingual",
|
|
@@ -34,8 +34,7 @@ _EVAL_LANGS = {
|
|
|
34
34
|
class MKQARetrieval(AbsTaskRetrieval):
|
|
35
35
|
metadata = TaskMetadata(
|
|
36
36
|
name="MKQARetrieval",
|
|
37
|
-
description="
|
|
38
|
-
For each query we collect new passage-independent answers. These queries and answers are then human translated into 25 Non-English languages.""",
|
|
37
|
+
description="Multilingual Knowledge Questions & Answers (MKQA)contains 10,000 queries sampled from the Google Natural Questions dataset. For each query we collect new passage-independent answers. These queries and answers are then human translated into 25 Non-English languages.",
|
|
39
38
|
reference="https://github.com/apple/ml-mkqa",
|
|
40
39
|
dataset={
|
|
41
40
|
"path": "mteb/MKQARetrieval",
|
|
@@ -75,10 +75,7 @@ _EVAL_LANGS = extend_lang_pairs()
|
|
|
75
75
|
class MLQARetrieval(AbsTaskRetrieval):
|
|
76
76
|
metadata = TaskMetadata(
|
|
77
77
|
name="MLQARetrieval",
|
|
78
|
-
description="
|
|
79
|
-
MLQA consists of over 5K extractive QA instances (12K in English) in SQuAD format in seven languages - English, Arabic,
|
|
80
|
-
German, Spanish, Hindi, Vietnamese and Simplified Chinese. MLQA is highly parallel, with QA instances parallel between
|
|
81
|
-
4 different languages on average.""",
|
|
78
|
+
description="MLQA (MultiLingual Question Answering) is a benchmark dataset for evaluating cross-lingual question answering performance. MLQA consists of over 5K extractive QA instances (12K in English) in SQuAD format in seven languages - English, Arabic, German, Spanish, Hindi, Vietnamese and Simplified Chinese. MLQA is highly parallel, with QA instances parallel between 4 different languages on average.",
|
|
82
79
|
reference="https://huggingface.co/datasets/mlqa",
|
|
83
80
|
dataset={
|
|
84
81
|
"path": "mteb/MLQARetrieval",
|
|
@@ -21,8 +21,7 @@ _LANGUAGES = {
|
|
|
21
21
|
class MultiLongDocRetrieval(AbsTaskRetrieval):
|
|
22
22
|
metadata = TaskMetadata(
|
|
23
23
|
name="MultiLongDocRetrieval",
|
|
24
|
-
description="
|
|
25
|
-
It is constructed by sampling lengthy articles from Wikipedia, Wudao and mC4 datasets and randomly choose paragraphs from them. Then we use GPT-3.5 to generate questions based on these paragraphs. The generated question and the sampled article constitute a new text pair to the dataset.""",
|
|
24
|
+
description="Multi Long Doc Retrieval (MLDR) 'is curated by the multilingual articles from Wikipedia, Wudao and mC4 (see Table 7), and NarrativeQA (Kocˇisky ́ et al., 2018; Gu ̈nther et al., 2023), which is only for English.' (Chen et al., 2024). It is constructed by sampling lengthy articles from Wikipedia, Wudao and mC4 datasets and randomly choose paragraphs from them. Then we use GPT-3.5 to generate questions based on these paragraphs. The generated question and the sampled article constitute a new text pair to the dataset.",
|
|
26
25
|
reference="https://arxiv.org/abs/2402.03216", # also: https://huggingface.co/datasets/Shitao/MLDR
|
|
27
26
|
dataset={
|
|
28
27
|
"path": "mteb/MultiLongDocRetrieval",
|
|
@@ -68,11 +68,7 @@ class RuSciBenchCiteRetrieval(AbsTaskRetrieval):
|
|
|
68
68
|
"path": "mlsa-iai-msu-lab/ru_sci_bench_cite_retrieval",
|
|
69
69
|
"revision": "6cb447d02f41b8b775d5d9df7faf472f44d2f1db",
|
|
70
70
|
},
|
|
71
|
-
description="
|
|
72
|
-
Russia's largest electronic library of scientific publications. Given a query paper (title and abstract),
|
|
73
|
-
the goal is to retrieve papers that are directly cited by it from a larger corpus of papers.
|
|
74
|
-
The dataset for this task consists of 3,000 query papers, 15,000 relevant (cited) papers,
|
|
75
|
-
and 75,000 irrelevant papers. The task is available for both Russian and English scientific texts.""",
|
|
71
|
+
description="This task is focused on Direct Citation Prediction for scientific papers from eLibrary, Russia's largest electronic library of scientific publications. Given a query paper (title and abstract), the goal is to retrieve papers that are directly cited by it from a larger corpus of papers. The dataset for this task consists of 3,000 query papers, 15,000 relevant (cited) papers, and 75,000 irrelevant papers. The task is available for both Russian and English scientific texts.",
|
|
76
72
|
reference="https://github.com/mlsa-iai-msu-lab/ru_sci_bench_mteb",
|
|
77
73
|
type="Retrieval",
|
|
78
74
|
category="t2t",
|
|
@@ -130,13 +126,7 @@ class RuSciBenchCociteRetrieval(AbsTaskRetrieval):
|
|
|
130
126
|
"path": "mlsa-iai-msu-lab/ru_sci_bench_cocite_retrieval",
|
|
131
127
|
"revision": "a5da47a245275669d2b6ddf8f96c5338dd2428b4",
|
|
132
128
|
},
|
|
133
|
-
description="
|
|
134
|
-
Russia's largest electronic library of scientific publications. Given a query paper (title and abstract),
|
|
135
|
-
the goal is to retrieve other papers that are co-cited with it. Two papers are considered co-cited
|
|
136
|
-
if they are both cited by at least 5 of the same other papers. Similar to the Direct Citation task,
|
|
137
|
-
this task employs a retrieval setup: for a given query paper, all other papers in the corpus that
|
|
138
|
-
are not co-cited with it are considered negative examples. The task is available for both Russian
|
|
139
|
-
and English scientific texts.""",
|
|
129
|
+
description="This task focuses on Co-citation Prediction for scientific papers from eLibrary, Russia's largest electronic library of scientific publications. Given a query paper (title and abstract), the goal is to retrieve other papers that are co-cited with it. Two papers are considered co-cited if they are both cited by at least 5 of the same other papers. Similar to the Direct Citation task, this task employs a retrieval setup: for a given query paper, all other papers in the corpus that are not co-cited with it are considered negative examples. The task is available for both Russian and English scientific texts.",
|
|
140
130
|
reference="https://github.com/mlsa-iai-msu-lab/ru_sci_bench_mteb",
|
|
141
131
|
type="Retrieval",
|
|
142
132
|
category="t2t",
|