mteb 2.0.5__py3-none-any.whl → 2.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/__init__.py +10 -1
- mteb/_create_dataloaders.py +2 -0
- mteb/abstasks/_stratification.py +1 -1
- mteb/abstasks/abstask.py +6 -1
- mteb/abstasks/dataset_card_template.md +1 -1
- mteb/abstasks/retrieval.py +2 -1
- mteb/abstasks/retrieval_dataset_loaders.py +1 -1
- mteb/abstasks/task_metadata.py +1 -1
- mteb/benchmarks/benchmarks/__init__.py +2 -0
- mteb/benchmarks/benchmarks/benchmarks.py +82 -11
- mteb/benchmarks/get_benchmark.py +1 -1
- mteb/descriptive_stats/Classification/DutchColaClassification.json +54 -0
- mteb/descriptive_stats/Classification/DutchGovernmentBiasClassification.json +54 -0
- mteb/descriptive_stats/Classification/DutchNewsArticlesClassification.json +90 -0
- mteb/descriptive_stats/Classification/DutchSarcasticHeadlinesClassification.json +54 -0
- mteb/descriptive_stats/Classification/IconclassClassification.json +96 -0
- mteb/descriptive_stats/Classification/OpenTenderClassification.json +222 -0
- mteb/descriptive_stats/Classification/VaccinChatNLClassification.json +1068 -0
- mteb/descriptive_stats/Clustering/DutchNewsArticlesClusteringP2P.json +45 -0
- mteb/descriptive_stats/Clustering/DutchNewsArticlesClusteringS2S.json +45 -0
- mteb/descriptive_stats/Clustering/IconclassClusteringS2S.json +48 -0
- mteb/descriptive_stats/Clustering/OpenTenderClusteringP2P.json +111 -0
- mteb/descriptive_stats/Clustering/OpenTenderClusteringS2S.json +111 -0
- mteb/descriptive_stats/Clustering/VABBClusteringP2P.json +60 -0
- mteb/descriptive_stats/Clustering/VABBClusteringS2S.json +60 -0
- mteb/descriptive_stats/Image/Any2AnyMultilingualRetrieval/XFlickr30kCoT2IRetrieval.json +243 -153
- mteb/descriptive_stats/Image/Any2AnyMultilingualRetrieval/XM3600T2IRetrieval.json +999 -629
- mteb/descriptive_stats/Image/Any2AnyRetrieval/OVENIT2TRetrieval.json +33 -17
- mteb/descriptive_stats/Image/DocumentUnderstanding/MIRACLVisionRetrieval.json +574 -0
- mteb/descriptive_stats/MultilabelClassification/CovidDisinformationNLMultiLabelClassification.json +84 -0
- mteb/descriptive_stats/MultilabelClassification/VABBMultiLabelClassification.json +156 -0
- mteb/descriptive_stats/PairClassification/SICKNLPairClassification.json +35 -0
- mteb/descriptive_stats/PairClassification/XLWICNLPairClassification.json +35 -0
- mteb/descriptive_stats/Retrieval/ClimateFEVERHardNegatives.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/DBPediaHardNegatives.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/DutchNewsArticlesRetrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/FEVERHardNegatives.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/HotpotQAHardNegatives.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/LegalQANLRetrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/OpenTenderRetrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/QuoraRetrievalHardNegatives.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/RiaNewsRetrievalHardNegatives.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/VABBRetrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/VDRMultilingualRetrieval.json +184 -0
- mteb/descriptive_stats/Retrieval/bBSARDNLRetrieval.json +30 -0
- mteb/descriptive_stats/STS/SICK-NL-STS.json +28 -0
- mteb/languages/check_language_code.py +11 -3
- mteb/languages/language_scripts.py +4 -0
- mteb/leaderboard/text_segments.py +1 -1
- mteb/models/model_implementations/b1ade_models.py +1 -1
- mteb/models/model_implementations/bge_models.py +1 -3
- mteb/models/model_implementations/bmretriever_models.py +1 -1
- mteb/models/model_implementations/gme_v_models.py +2 -2
- mteb/models/model_implementations/ibm_granite_models.py +1 -1
- mteb/models/model_implementations/inf_models.py +3 -3
- mteb/models/model_implementations/jina_models.py +12 -2
- mteb/models/model_implementations/llm2vec_models.py +1 -1
- mteb/models/model_implementations/misc_models.py +2 -2
- mteb/models/model_implementations/mxbai_models.py +1 -1
- mteb/models/model_implementations/salesforce_models.py +1 -1
- mteb/models/model_implementations/seed_1_6_embedding_models.py +1 -1
- mteb/models/model_implementations/voyage_v.py +9 -9
- mteb/results/task_result.py +6 -8
- mteb/tasks/classification/dan/angry_tweets_classification.py +2 -2
- mteb/tasks/classification/eng/legal_bench_classification.py +3 -3
- mteb/tasks/classification/mya/myanmar_news.py +2 -2
- mteb/tasks/classification/nld/__init__.py +16 -0
- mteb/tasks/classification/nld/dutch_cola_classification.py +38 -0
- mteb/tasks/classification/nld/dutch_government_bias_classification.py +37 -0
- mteb/tasks/classification/nld/dutch_news_articles_classification.py +30 -0
- mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +36 -0
- mteb/tasks/classification/nld/iconclass_classification.py +41 -0
- mteb/tasks/classification/nld/open_tender_classification.py +38 -0
- mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +46 -0
- mteb/tasks/classification/tha/wongnai_reviews_classification.py +1 -1
- mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -2
- mteb/tasks/clustering/__init__.py +1 -0
- mteb/tasks/clustering/nld/__init__.py +17 -0
- mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +37 -0
- mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +37 -0
- mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +47 -0
- mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +51 -0
- mteb/tasks/clustering/nld/open_tender_clustering_s2s.py +41 -0
- mteb/tasks/clustering/nld/vabb_clustering_p2p.py +51 -0
- mteb/tasks/clustering/nld/vabb_clustering_s2s.py +51 -0
- mteb/tasks/multilabel_classification/__init__.py +1 -0
- mteb/tasks/multilabel_classification/nld/__init__.py +9 -0
- mteb/tasks/multilabel_classification/nld/covid_disinformation_nl_multi_label_classification.py +88 -0
- mteb/tasks/multilabel_classification/nld/vabb_multi_label_classification.py +44 -0
- mteb/tasks/pair_classification/__init__.py +1 -0
- mteb/tasks/pair_classification/multilingual/indic_xnli_pair_classification.py +9 -8
- mteb/tasks/pair_classification/nld/__init__.py +7 -0
- mteb/tasks/pair_classification/nld/sick_nl_pair_classification.py +36 -0
- mteb/tasks/pair_classification/nld/xlwic_nl_pair_classification.py +41 -0
- mteb/tasks/retrieval/code/code_rag.py +8 -8
- mteb/tasks/retrieval/dan/dan_fever_retrieval.py +1 -1
- mteb/tasks/retrieval/dan/tv2_nordretrieval.py +2 -2
- mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +2 -2
- mteb/tasks/retrieval/eng/__init__.py +18 -4
- mteb/tasks/retrieval/eng/climate_fever_retrieval.py +68 -77
- mteb/tasks/retrieval/eng/dbpedia_retrieval.py +55 -50
- mteb/tasks/retrieval/eng/fever_retrieval.py +62 -67
- mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +0 -4
- mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +0 -4
- mteb/tasks/retrieval/eng/hotpot_qa_retrieval.py +57 -67
- mteb/tasks/retrieval/eng/legal_summarization_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +0 -3
- mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +0 -2
- mteb/tasks/retrieval/eng/oven_it2t_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/quora_retrieval.py +51 -46
- mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +0 -4
- mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +0 -4
- mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +0 -2
- mteb/tasks/retrieval/jpn/ja_gov_faqs_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/belebele_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +0 -2
- mteb/tasks/retrieval/multilingual/miracl_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +2 -9
- mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +0 -2
- mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +0 -2
- mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +6 -5
- mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +3 -4
- mteb/tasks/retrieval/nld/__init__.py +10 -0
- mteb/tasks/retrieval/nld/bbsard_nl_retrieval.py +41 -0
- mteb/tasks/retrieval/nld/dutch_news_articles_retrieval.py +30 -0
- mteb/tasks/retrieval/nld/legal_qa_nl_retrieval.py +39 -0
- mteb/tasks/retrieval/nld/open_tender_retrieval.py +38 -0
- mteb/tasks/retrieval/nld/vabb_retrieval.py +41 -0
- mteb/tasks/retrieval/nob/norquad.py +2 -2
- mteb/tasks/retrieval/nob/snl_retrieval.py +2 -2
- mteb/tasks/retrieval/rus/__init__.py +11 -2
- mteb/tasks/retrieval/rus/ria_news_retrieval.py +48 -44
- mteb/tasks/retrieval/tur/tur_hist_quad.py +2 -2
- mteb/tasks/sts/__init__.py +1 -0
- mteb/tasks/sts/nld/__init__.py +5 -0
- mteb/tasks/sts/nld/sick_nl_sts.py +41 -0
- mteb-2.1.1.dist-info/METADATA +253 -0
- {mteb-2.0.5.dist-info → mteb-2.1.1.dist-info}/RECORD +142 -95
- mteb/descriptive_stats/Classification/PersianTextTone.json +0 -56
- mteb/descriptive_stats/Image/Any2TextMutipleChoice/CVBenchCount.json +0 -37
- mteb/descriptive_stats/Image/Any2TextMutipleChoice/CVBenchDepth.json +0 -25
- mteb/descriptive_stats/Image/Any2TextMutipleChoice/CVBenchDistance.json +0 -25
- mteb/descriptive_stats/Image/Any2TextMutipleChoice/CVBenchRelation.json +0 -25
- mteb/descriptive_stats/Image/VisualSTS/STS12VisualSTS.json +0 -20
- mteb/descriptive_stats/Image/VisualSTS/STS13VisualSTS.json +0 -20
- mteb/descriptive_stats/Image/VisualSTS/STS14VisualSTS.json +0 -20
- mteb/descriptive_stats/Image/VisualSTS/STS15VisualSTS.json +0 -20
- mteb/descriptive_stats/Image/VisualSTS/STS16VisualSTS.json +0 -20
- mteb/descriptive_stats/Image/VisualSTS/STS17MultilingualVisualSTS.json +0 -220
- mteb/descriptive_stats/Image/VisualSTS/STSBenchmarkMultilingualVisualSTS.json +0 -402
- mteb/descriptive_stats/Reranking/InstructIR.json +0 -31
- mteb-2.0.5.dist-info/METADATA +0 -455
- {mteb-2.0.5.dist-info → mteb-2.1.1.dist-info}/WHEEL +0 -0
- {mteb-2.0.5.dist-info → mteb-2.1.1.dist-info}/entry_points.txt +0 -0
- {mteb-2.0.5.dist-info → mteb-2.1.1.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.0.5.dist-info → mteb-2.1.1.dist-info}/top_level.txt +0 -0
|
@@ -1,30 +1,21 @@
|
|
|
1
1
|
from mteb.abstasks.retrieval import AbsTaskRetrieval
|
|
2
2
|
from mteb.abstasks.task_metadata import TaskMetadata
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
main_score="ndcg_at_10",
|
|
20
|
-
date=("2001-01-01", "2020-12-31"), # launch of wiki -> paper publication
|
|
21
|
-
domains=["Encyclopaedic", "Written"],
|
|
22
|
-
task_subtypes=["Claim verification"],
|
|
23
|
-
license="cc-by-sa-4.0",
|
|
24
|
-
annotations_creators="human-annotated",
|
|
25
|
-
dialect=[],
|
|
26
|
-
sample_creation="found",
|
|
27
|
-
bibtex_citation=r"""
|
|
4
|
+
_climate_fever_metadata = dict(
|
|
5
|
+
type="Retrieval",
|
|
6
|
+
category="t2t",
|
|
7
|
+
modalities=["text"],
|
|
8
|
+
eval_splits=["test"],
|
|
9
|
+
eval_langs=["eng-Latn"],
|
|
10
|
+
main_score="ndcg_at_10",
|
|
11
|
+
date=("2001-01-01", "2020-12-31"), # launch of wiki -> paper publication
|
|
12
|
+
domains=["Encyclopaedic", "Written"],
|
|
13
|
+
task_subtypes=["Claim verification"],
|
|
14
|
+
license="cc-by-sa-4.0",
|
|
15
|
+
annotations_creators="human-annotated",
|
|
16
|
+
dialect=[],
|
|
17
|
+
sample_creation="found",
|
|
18
|
+
bibtex_citation=r"""
|
|
28
19
|
@misc{diggelmann2021climatefever,
|
|
29
20
|
archiveprefix = {arXiv},
|
|
30
21
|
author = {Thomas Diggelmann and Jordan Boyd-Graber and Jannis Bulian and Massimiliano Ciaramita and Markus Leippold},
|
|
@@ -34,82 +25,82 @@ class ClimateFEVER(AbsTaskRetrieval):
|
|
|
34
25
|
year = {2021},
|
|
35
26
|
}
|
|
36
27
|
""",
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class ClimateFEVER(AbsTaskRetrieval):
|
|
32
|
+
metadata = TaskMetadata(
|
|
33
|
+
name="ClimateFEVER",
|
|
34
|
+
description=(
|
|
35
|
+
"CLIMATE-FEVER is a dataset adopting the FEVER methodology that consists of 1,535 real-world claims "
|
|
36
|
+
"(queries) regarding climate-change. The underlying corpus is the same as FEVER."
|
|
37
|
+
),
|
|
38
|
+
reference="https://www.sustainablefinance.uzh.ch/en/research/climate-fever.html",
|
|
39
|
+
dataset={
|
|
40
|
+
"path": "mteb/climate-fever",
|
|
41
|
+
"revision": "47f2ac6acb640fc46020b02a5b59fdda04d39380",
|
|
42
|
+
},
|
|
43
|
+
prompt={
|
|
44
|
+
"query": "Given a claim about climate change, retrieve documents that support or refute the claim"
|
|
45
|
+
},
|
|
46
|
+
**_climate_fever_metadata,
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class ClimateFEVERRetrievalv2(AbsTaskRetrieval):
|
|
51
|
+
metadata = TaskMetadata(
|
|
52
|
+
name="ClimateFEVER.v2",
|
|
53
|
+
description=(
|
|
54
|
+
"CLIMATE-FEVER is a dataset following the FEVER methodology, containing 1,535 real-world climate change claims. "
|
|
55
|
+
"This updated version addresses corpus mismatches and qrel inconsistencies in MTEB, restoring labels while refining corpus-query alignment for better accuracy."
|
|
56
|
+
),
|
|
57
|
+
reference="https://www.sustainablefinance.uzh.ch/en/research/climate-fever.html",
|
|
58
|
+
dataset={
|
|
59
|
+
"path": "mteb/climate-fever-v2",
|
|
60
|
+
"revision": "e438c9586767800aeb10dbe8a245c41dbea4e5f4",
|
|
61
|
+
},
|
|
37
62
|
prompt={
|
|
38
63
|
"query": "Given a claim about climate change, retrieve documents that support or refute the claim"
|
|
39
64
|
},
|
|
65
|
+
adapted_from=["ClimateFEVER"],
|
|
66
|
+
**_climate_fever_metadata,
|
|
40
67
|
)
|
|
41
68
|
|
|
42
69
|
|
|
43
70
|
class ClimateFEVERHardNegatives(AbsTaskRetrieval):
|
|
44
71
|
metadata = TaskMetadata(
|
|
45
72
|
name="ClimateFEVERHardNegatives",
|
|
46
|
-
description=
|
|
73
|
+
description=(
|
|
74
|
+
"CLIMATE-FEVER is a dataset adopting the FEVER methodology that consists of 1,535 real-world claims regarding climate-change. "
|
|
75
|
+
"The hard negative version has been created by pooling the 250 top documents per query from BM25, e5-multilingual-large and e5-mistral-instruct."
|
|
76
|
+
),
|
|
47
77
|
reference="https://www.sustainablefinance.uzh.ch/en/research/climate-fever.html",
|
|
48
78
|
dataset={
|
|
49
79
|
"path": "mteb/ClimateFEVER_test_top_250_only_w_correct-v2",
|
|
50
80
|
"revision": "3a309e201f3c2c4b13bd4a367a8f37eee2ec1d21",
|
|
51
81
|
},
|
|
52
|
-
type="Retrieval",
|
|
53
|
-
category="t2t",
|
|
54
|
-
modalities=["text"],
|
|
55
|
-
eval_splits=["test"],
|
|
56
|
-
eval_langs=["eng-Latn"],
|
|
57
|
-
main_score="ndcg_at_10",
|
|
58
|
-
date=("2001-01-01", "2020-12-31"), # launch of wiki -> paper publication
|
|
59
|
-
domains=["Encyclopaedic", "Written"],
|
|
60
|
-
task_subtypes=["Claim verification"],
|
|
61
|
-
license="cc-by-sa-4.0",
|
|
62
|
-
annotations_creators="human-annotated",
|
|
63
|
-
dialect=[],
|
|
64
|
-
sample_creation="found",
|
|
65
|
-
bibtex_citation=r"""
|
|
66
|
-
@misc{diggelmann2021climatefever,
|
|
67
|
-
archiveprefix = {arXiv},
|
|
68
|
-
author = {Thomas Diggelmann and Jordan Boyd-Graber and Jannis Bulian and Massimiliano Ciaramita and Markus Leippold},
|
|
69
|
-
eprint = {2012.00614},
|
|
70
|
-
primaryclass = {cs.CL},
|
|
71
|
-
title = {CLIMATE-FEVER: A Dataset for Verification of Real-World Climate Claims},
|
|
72
|
-
year = {2021},
|
|
73
|
-
}
|
|
74
|
-
""",
|
|
75
82
|
adapted_from=["ClimateFEVER"],
|
|
83
|
+
superseded_by="ClimateFEVERHardNegatives.v2",
|
|
84
|
+
**_climate_fever_metadata,
|
|
76
85
|
)
|
|
77
86
|
|
|
78
87
|
|
|
79
|
-
class
|
|
88
|
+
class ClimateFEVERHardNegativesV2(AbsTaskRetrieval):
|
|
80
89
|
metadata = TaskMetadata(
|
|
81
|
-
name="
|
|
82
|
-
description=
|
|
90
|
+
name="ClimateFEVERHardNegatives.v2",
|
|
91
|
+
description=(
|
|
92
|
+
"CLIMATE-FEVER is a dataset adopting the FEVER methodology that consists of 1,535 real-world claims regarding climate-change. "
|
|
93
|
+
"The hard negative version has been created by pooling the 250 top documents per query from BM25, e5-multilingual-large and e5-mistral-instruct. "
|
|
94
|
+
"V2 uses a more appropriate prompt rather than the default prompt for retrieval. You can get more information on the effect of different prompt in the [PR](https://github.com/embeddings-benchmark/mteb/pull/3469#issuecomment-3436467106)"
|
|
95
|
+
),
|
|
83
96
|
reference="https://www.sustainablefinance.uzh.ch/en/research/climate-fever.html",
|
|
84
97
|
dataset={
|
|
85
|
-
"path": "mteb/
|
|
86
|
-
"revision": "
|
|
98
|
+
"path": "mteb/ClimateFEVER_test_top_250_only_w_correct-v2",
|
|
99
|
+
"revision": "3a309e201f3c2c4b13bd4a367a8f37eee2ec1d21",
|
|
87
100
|
},
|
|
88
|
-
|
|
89
|
-
category="t2t",
|
|
90
|
-
modalities=["text"],
|
|
91
|
-
eval_splits=["test"],
|
|
92
|
-
eval_langs=["eng-Latn"],
|
|
93
|
-
main_score="ndcg_at_10",
|
|
94
|
-
date=("2001-01-01", "2020-12-31"), # launch of wiki -> paper publication
|
|
95
|
-
domains=["Academic", "Written"],
|
|
96
|
-
task_subtypes=["Claim verification"],
|
|
97
|
-
license="cc-by-sa-4.0",
|
|
98
|
-
annotations_creators="human-annotated",
|
|
99
|
-
dialect=[],
|
|
100
|
-
sample_creation="found",
|
|
101
|
-
bibtex_citation=r"""
|
|
102
|
-
@misc{diggelmann2021climatefever,
|
|
103
|
-
archiveprefix = {arXiv},
|
|
104
|
-
author = {Thomas Diggelmann and Jordan Boyd-Graber and Jannis Bulian and Massimiliano Ciaramita and Markus Leippold},
|
|
105
|
-
eprint = {2012.00614},
|
|
106
|
-
primaryclass = {cs.CL},
|
|
107
|
-
title = {CLIMATE-FEVER: A Dataset for Verification of Real-World Climate Claims},
|
|
108
|
-
year = {2021},
|
|
109
|
-
}
|
|
110
|
-
""",
|
|
101
|
+
adapted_from=["ClimateFEVER"],
|
|
111
102
|
prompt={
|
|
112
103
|
"query": "Given a claim about climate change, retrieve documents that support or refute the claim"
|
|
113
104
|
},
|
|
114
|
-
|
|
105
|
+
**_climate_fever_metadata,
|
|
115
106
|
)
|
|
@@ -1,30 +1,21 @@
|
|
|
1
1
|
from mteb.abstasks.retrieval import AbsTaskRetrieval
|
|
2
2
|
from mteb.abstasks.task_metadata import TaskMetadata
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
main_score="ndcg_at_10",
|
|
20
|
-
date=("2017-01-01", "2017-01-01"), # best guess: based on publication date
|
|
21
|
-
domains=["Written", "Encyclopaedic"],
|
|
22
|
-
task_subtypes=[],
|
|
23
|
-
license="mit",
|
|
24
|
-
annotations_creators="derived",
|
|
25
|
-
dialect=[],
|
|
26
|
-
sample_creation="found",
|
|
27
|
-
bibtex_citation=r"""
|
|
4
|
+
_dbpedia_metadata = dict(
|
|
5
|
+
type="Retrieval",
|
|
6
|
+
category="t2t",
|
|
7
|
+
modalities=["text"],
|
|
8
|
+
eval_splits=["test"],
|
|
9
|
+
eval_langs=["eng-Latn"],
|
|
10
|
+
main_score="ndcg_at_10",
|
|
11
|
+
date=("2017-01-01", "2017-01-01"), # best guess: based on publication date
|
|
12
|
+
domains=["Written", "Encyclopaedic"],
|
|
13
|
+
task_subtypes=[],
|
|
14
|
+
license="mit",
|
|
15
|
+
annotations_creators="derived",
|
|
16
|
+
dialect=[],
|
|
17
|
+
sample_creation="found",
|
|
18
|
+
bibtex_citation=r"""
|
|
28
19
|
@inproceedings{Hasibi:2017:DVT,
|
|
29
20
|
author = {Hasibi, Faegheh and Nikolaev, Fedor and Xiong, Chenyan and Balog, Krisztian and Bratsberg, Svein Erik and Kotov, Alexander and Callan, Jamie},
|
|
30
21
|
booktitle = {Proceedings of the 40th International ACM SIGIR Conference on Research and Development in Information Retrieval},
|
|
@@ -36,45 +27,59 @@ class DBPedia(AbsTaskRetrieval):
|
|
|
36
27
|
year = {2017},
|
|
37
28
|
}
|
|
38
29
|
""",
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class DBPedia(AbsTaskRetrieval):
|
|
34
|
+
metadata = TaskMetadata(
|
|
35
|
+
name="DBPedia",
|
|
36
|
+
description="DBpedia-Entity is a standard test collection for entity search over the DBpedia knowledge base",
|
|
37
|
+
reference="https://github.com/iai-group/DBpedia-Entity/",
|
|
38
|
+
dataset={
|
|
39
|
+
"path": "mteb/dbpedia",
|
|
40
|
+
"revision": "c0f706b76e590d620bd6618b3ca8efdd34e2d659",
|
|
41
|
+
},
|
|
39
42
|
prompt={
|
|
40
43
|
"query": "Given a query, retrieve relevant entity descriptions from DBPedia"
|
|
41
44
|
},
|
|
45
|
+
**_dbpedia_metadata,
|
|
42
46
|
)
|
|
43
47
|
|
|
44
48
|
|
|
45
49
|
class DBPediaHardNegatives(AbsTaskRetrieval):
|
|
46
50
|
metadata = TaskMetadata(
|
|
47
51
|
name="DBPediaHardNegatives",
|
|
48
|
-
description=
|
|
52
|
+
description=(
|
|
53
|
+
"DBpedia-Entity is a standard test collection for entity search over the DBpedia knowledge base. "
|
|
54
|
+
"The hard negative version has been created by pooling the 250 top documents per query from BM25, e5-multilingual-large and e5-mistral-instruct."
|
|
55
|
+
),
|
|
56
|
+
reference="https://github.com/iai-group/DBpedia-Entity/",
|
|
57
|
+
dataset={
|
|
58
|
+
"path": "mteb/DBPedia_test_top_250_only_w_correct-v2",
|
|
59
|
+
"revision": "943ec7fdfef3728b2ad1966c5b6479ff9ffd26c9",
|
|
60
|
+
},
|
|
61
|
+
superseded_by="DBPediaHardNegatives.v2",
|
|
62
|
+
adapted_from=["DBPedia"],
|
|
63
|
+
**_dbpedia_metadata,
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class DBPediaHardNegativesV2(AbsTaskRetrieval):
|
|
68
|
+
metadata = TaskMetadata(
|
|
69
|
+
name="DBPediaHardNegatives.v2",
|
|
70
|
+
description=(
|
|
71
|
+
"DBpedia-Entity is a standard test collection for entity search over the DBpedia knowledge base. "
|
|
72
|
+
"The hard negative version has been created by pooling the 250 top documents per query from BM25, e5-multilingual-large and e5-mistral-instruct. "
|
|
73
|
+
"V2 uses a more appropriate prompt rather than the default prompt for retrieval. You can get more information on the effect of different prompt in the [PR](https://github.com/embeddings-benchmark/mteb/pull/3469#issuecomment-3436467106)"
|
|
74
|
+
),
|
|
49
75
|
reference="https://github.com/iai-group/DBpedia-Entity/",
|
|
50
76
|
dataset={
|
|
51
77
|
"path": "mteb/DBPedia_test_top_250_only_w_correct-v2",
|
|
52
78
|
"revision": "943ec7fdfef3728b2ad1966c5b6479ff9ffd26c9",
|
|
53
79
|
},
|
|
54
|
-
type="Retrieval",
|
|
55
|
-
category="t2t",
|
|
56
|
-
modalities=["text"],
|
|
57
|
-
eval_splits=["test"],
|
|
58
|
-
eval_langs=["eng-Latn"],
|
|
59
|
-
main_score="ndcg_at_10",
|
|
60
|
-
date=("2017-01-01", "2017-01-01"), # best guess: based on publication date
|
|
61
|
-
domains=["Written", "Encyclopaedic"],
|
|
62
|
-
task_subtypes=[],
|
|
63
|
-
license="mit",
|
|
64
|
-
annotations_creators="derived",
|
|
65
|
-
dialect=[],
|
|
66
|
-
sample_creation="found",
|
|
67
|
-
bibtex_citation=r"""
|
|
68
|
-
@inproceedings{Hasibi:2017:DVT,
|
|
69
|
-
author = {Hasibi, Faegheh and Nikolaev, Fedor and Xiong, Chenyan and Balog, Krisztian and Bratsberg, Svein Erik and Kotov, Alexander and Callan, Jamie},
|
|
70
|
-
booktitle = {Proceedings of the 40th International ACM SIGIR Conference on Research and Development in Information Retrieval},
|
|
71
|
-
doi = {10.1145/3077136.3080751},
|
|
72
|
-
pages = {1265--1268},
|
|
73
|
-
publisher = {ACM},
|
|
74
|
-
series = {SIGIR '17},
|
|
75
|
-
title = {DBpedia-Entity V2: A Test Collection for Entity Search},
|
|
76
|
-
year = {2017},
|
|
77
|
-
}
|
|
78
|
-
""",
|
|
79
80
|
adapted_from=["DBPedia"],
|
|
81
|
+
prompt={
|
|
82
|
+
"query": "Given a query, retrieve relevant entity descriptions from DBPedia"
|
|
83
|
+
},
|
|
84
|
+
**_dbpedia_metadata,
|
|
80
85
|
)
|
|
@@ -1,36 +1,22 @@
|
|
|
1
1
|
from mteb.abstasks.retrieval import AbsTaskRetrieval
|
|
2
2
|
from mteb.abstasks.task_metadata import TaskMetadata
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
type="Retrieval",
|
|
21
|
-
category="t2t",
|
|
22
|
-
modalities=["text"],
|
|
23
|
-
eval_splits=["test"],
|
|
24
|
-
eval_langs=["eng-Latn"],
|
|
25
|
-
main_score="ndcg_at_10",
|
|
26
|
-
date=None,
|
|
27
|
-
domains=["Encyclopaedic", "Written"],
|
|
28
|
-
task_subtypes=["Claim verification"],
|
|
29
|
-
license="cc-by-nc-sa-3.0",
|
|
30
|
-
annotations_creators="human-annotated",
|
|
31
|
-
dialect=[],
|
|
32
|
-
sample_creation="found",
|
|
33
|
-
bibtex_citation=r"""
|
|
4
|
+
_fever_metadata = dict(
|
|
5
|
+
reference="https://fever.ai/",
|
|
6
|
+
type="Retrieval",
|
|
7
|
+
category="t2t",
|
|
8
|
+
modalities=["text"],
|
|
9
|
+
eval_splits=["test"],
|
|
10
|
+
eval_langs=["eng-Latn"],
|
|
11
|
+
main_score="ndcg_at_10",
|
|
12
|
+
date=None,
|
|
13
|
+
domains=["Encyclopaedic", "Written"],
|
|
14
|
+
task_subtypes=["Claim verification"],
|
|
15
|
+
license="cc-by-nc-sa-3.0",
|
|
16
|
+
annotations_creators="human-annotated",
|
|
17
|
+
dialect=[],
|
|
18
|
+
sample_creation="found",
|
|
19
|
+
bibtex_citation=r"""
|
|
34
20
|
@inproceedings{thorne-etal-2018-fever,
|
|
35
21
|
address = {New Orleans, Louisiana},
|
|
36
22
|
author = {Thorne, James and
|
|
@@ -50,9 +36,27 @@ Stent, Amanda},
|
|
|
50
36
|
year = {2018},
|
|
51
37
|
}
|
|
52
38
|
""",
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class FEVER(AbsTaskRetrieval):
|
|
43
|
+
ignore_identical_ids = True
|
|
44
|
+
|
|
45
|
+
metadata = TaskMetadata(
|
|
46
|
+
name="FEVER",
|
|
47
|
+
dataset={
|
|
48
|
+
"path": "mteb/fever",
|
|
49
|
+
"revision": "bea83ef9e8fb933d90a2f1d5515737465d613e12",
|
|
50
|
+
},
|
|
51
|
+
description=(
|
|
52
|
+
"FEVER (Fact Extraction and VERification) consists of 185,445 claims generated by altering sentences "
|
|
53
|
+
"extracted from Wikipedia and subsequently verified without knowledge of the sentence they were "
|
|
54
|
+
"derived from."
|
|
55
|
+
),
|
|
53
56
|
prompt={
|
|
54
57
|
"query": "Given a claim, retrieve documents that support or refute the claim"
|
|
55
58
|
},
|
|
59
|
+
**_fever_metadata,
|
|
56
60
|
)
|
|
57
61
|
|
|
58
62
|
|
|
@@ -66,43 +70,34 @@ class FEVERHardNegatives(AbsTaskRetrieval):
|
|
|
66
70
|
"revision": "080c9ed6267b65029207906e815d44a9240bafca",
|
|
67
71
|
},
|
|
68
72
|
description=(
|
|
69
|
-
"FEVER (Fact Extraction and VERification) consists of 185,445 claims generated by altering sentences"
|
|
70
|
-
|
|
71
|
-
|
|
73
|
+
"FEVER (Fact Extraction and VERification) consists of 185,445 claims generated by altering sentences "
|
|
74
|
+
"extracted from Wikipedia and subsequently verified without knowledge of the sentence they were "
|
|
75
|
+
"derived from. The hard negative version has been created by pooling the 250 top documents per query from BM25, e5-multilingual-large and e5-mistral-instruct."
|
|
76
|
+
),
|
|
77
|
+
adapted_from=["FEVER"],
|
|
78
|
+
superseded_by="FEVERHardNegatives.v2",
|
|
79
|
+
**_fever_metadata,
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
class FEVERHardNegativesV2(AbsTaskRetrieval):
|
|
84
|
+
ignore_identical_ids = True
|
|
85
|
+
|
|
86
|
+
metadata = TaskMetadata(
|
|
87
|
+
name="FEVERHardNegatives.v2",
|
|
88
|
+
dataset={
|
|
89
|
+
"path": "mteb/FEVER_test_top_250_only_w_correct-v2",
|
|
90
|
+
"revision": "080c9ed6267b65029207906e815d44a9240bafca",
|
|
91
|
+
},
|
|
92
|
+
description=(
|
|
93
|
+
"FEVER (Fact Extraction and VERification) consists of 185,445 claims generated by altering sentences "
|
|
94
|
+
"extracted from Wikipedia and subsequently verified without knowledge of the sentence they were "
|
|
95
|
+
"derived from. The hard negative version has been created by pooling the 250 top documents per query from BM25, e5-multilingual-large and e5-mistral-instruct. "
|
|
96
|
+
"V2 uses a more appropriate prompt rather than the default prompt for retrieval. You can get more information on the effect of different prompt in the [PR](https://github.com/embeddings-benchmark/mteb/pull/3469#issuecomment-3436467106)"
|
|
72
97
|
),
|
|
73
|
-
reference="https://fever.ai/",
|
|
74
|
-
type="Retrieval",
|
|
75
|
-
category="t2t",
|
|
76
|
-
modalities=["text"],
|
|
77
|
-
eval_splits=["test"],
|
|
78
|
-
eval_langs=["eng-Latn"],
|
|
79
|
-
main_score="ndcg_at_10",
|
|
80
|
-
date=None,
|
|
81
|
-
domains=["Encyclopaedic", "Written"],
|
|
82
|
-
task_subtypes=["Claim verification"],
|
|
83
|
-
license="cc-by-nc-sa-3.0",
|
|
84
|
-
annotations_creators="human-annotated",
|
|
85
|
-
dialect=None,
|
|
86
|
-
sample_creation=None,
|
|
87
|
-
bibtex_citation=r"""
|
|
88
|
-
@inproceedings{thorne-etal-2018-fever,
|
|
89
|
-
address = {New Orleans, Louisiana},
|
|
90
|
-
author = {Thorne, James and
|
|
91
|
-
Vlachos, Andreas and
|
|
92
|
-
Christodoulopoulos, Christos and
|
|
93
|
-
Mittal, Arpit},
|
|
94
|
-
booktitle = {Proceedings of the 2018 Conference of the North {A}merican Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long Papers)},
|
|
95
|
-
doi = {10.18653/v1/N18-1074},
|
|
96
|
-
editor = {Walker, Marilyn and
|
|
97
|
-
Ji, Heng and
|
|
98
|
-
Stent, Amanda},
|
|
99
|
-
month = jun,
|
|
100
|
-
pages = {809--819},
|
|
101
|
-
publisher = {Association for Computational Linguistics},
|
|
102
|
-
title = {{FEVER}: a Large-scale Dataset for Fact Extraction and {VER}ification},
|
|
103
|
-
url = {https://aclanthology.org/N18-1074},
|
|
104
|
-
year = {2018},
|
|
105
|
-
}
|
|
106
|
-
""",
|
|
107
98
|
adapted_from=["FEVER"],
|
|
99
|
+
prompt={
|
|
100
|
+
"query": "Given a claim, retrieve documents that support or refute the claim"
|
|
101
|
+
},
|
|
102
|
+
**_fever_metadata,
|
|
108
103
|
)
|
|
@@ -24,9 +24,7 @@ def _load_data(path: str, splits: str, revision: str | None = None):
|
|
|
24
24
|
shared_corpus = shared_corpus.map(
|
|
25
25
|
lambda x: {
|
|
26
26
|
"id": "corpus-" + str(x["id"]),
|
|
27
|
-
# "text": x["text"],
|
|
28
27
|
"modality": "text",
|
|
29
|
-
"image": None,
|
|
30
28
|
},
|
|
31
29
|
remove_columns=[
|
|
32
30
|
"split",
|
|
@@ -40,9 +38,7 @@ def _load_data(path: str, splits: str, revision: str | None = None):
|
|
|
40
38
|
queries[split] = split_dataset.map(
|
|
41
39
|
lambda x: {
|
|
42
40
|
"id": "query-" + str(x["id"]),
|
|
43
|
-
"text": None,
|
|
44
41
|
"modality": "image",
|
|
45
|
-
# "image": x["image"],
|
|
46
42
|
},
|
|
47
43
|
remove_columns=[
|
|
48
44
|
"split",
|
|
@@ -24,9 +24,7 @@ def _load_data(path: str, splits: str, revision: str | None = None):
|
|
|
24
24
|
shared_corpus = shared_corpus.map(
|
|
25
25
|
lambda x: {
|
|
26
26
|
"id": "corpus-" + str(x["id"]),
|
|
27
|
-
"text": None,
|
|
28
27
|
"modality": "image",
|
|
29
|
-
# "image": None,
|
|
30
28
|
},
|
|
31
29
|
remove_columns=[
|
|
32
30
|
"split",
|
|
@@ -40,9 +38,7 @@ def _load_data(path: str, splits: str, revision: str | None = None):
|
|
|
40
38
|
queries[split] = split_dataset.map(
|
|
41
39
|
lambda x: {
|
|
42
40
|
"id": "query-" + str(x["id"]),
|
|
43
|
-
# "text": None,
|
|
44
41
|
"modality": "text",
|
|
45
|
-
"image": None,
|
|
46
42
|
},
|
|
47
43
|
remove_columns=[
|
|
48
44
|
"split",
|
|
@@ -1,33 +1,22 @@
|
|
|
1
1
|
from mteb.abstasks.retrieval import AbsTaskRetrieval
|
|
2
2
|
from mteb.abstasks.task_metadata import TaskMetadata
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
eval_splits=["test"],
|
|
21
|
-
eval_langs=["eng-Latn"],
|
|
22
|
-
main_score="ndcg_at_10",
|
|
23
|
-
date=("2018-01-01", "2018-12-31"), # best guess: based on publication date
|
|
24
|
-
domains=["Web", "Written"],
|
|
25
|
-
task_subtypes=["Question answering"],
|
|
26
|
-
license="cc-by-sa-4.0",
|
|
27
|
-
annotations_creators="human-annotated",
|
|
28
|
-
dialect=[],
|
|
29
|
-
sample_creation="found",
|
|
30
|
-
bibtex_citation=r"""
|
|
4
|
+
_hotpot_qa_metadata = dict(
|
|
5
|
+
reference="https://hotpotqa.github.io/",
|
|
6
|
+
type="Retrieval",
|
|
7
|
+
category="t2t",
|
|
8
|
+
modalities=["text"],
|
|
9
|
+
eval_splits=["test"],
|
|
10
|
+
eval_langs=["eng-Latn"],
|
|
11
|
+
main_score="ndcg_at_10",
|
|
12
|
+
date=("2018-01-01", "2018-12-31"), # best guess: based on publication date
|
|
13
|
+
domains=["Web", "Written"],
|
|
14
|
+
task_subtypes=["Question answering"],
|
|
15
|
+
license="cc-by-sa-4.0",
|
|
16
|
+
annotations_creators="human-annotated",
|
|
17
|
+
dialect=[],
|
|
18
|
+
sample_creation="found",
|
|
19
|
+
bibtex_citation=r"""
|
|
31
20
|
@inproceedings{yang-etal-2018-hotpotqa,
|
|
32
21
|
address = {Brussels, Belgium},
|
|
33
22
|
author = {Yang, Zhilin and
|
|
@@ -51,9 +40,24 @@ Tsujii, Jun{'}ichi},
|
|
|
51
40
|
year = {2018},
|
|
52
41
|
}
|
|
53
42
|
""",
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class HotpotQA(AbsTaskRetrieval):
|
|
47
|
+
metadata = TaskMetadata(
|
|
48
|
+
name="HotpotQA",
|
|
49
|
+
dataset={
|
|
50
|
+
"path": "mteb/hotpotqa",
|
|
51
|
+
"revision": "ab518f4d6fcca38d87c25209f94beba119d02014",
|
|
52
|
+
},
|
|
53
|
+
description=(
|
|
54
|
+
"HotpotQA is a question answering dataset featuring natural, multi-hop questions, with strong "
|
|
55
|
+
"supervision for supporting facts to enable more explainable question answering systems."
|
|
56
|
+
),
|
|
54
57
|
prompt={
|
|
55
58
|
"query": "Given a multi-hop question, retrieve documents that can help answer the question"
|
|
56
59
|
},
|
|
60
|
+
**_hotpot_qa_metadata,
|
|
57
61
|
)
|
|
58
62
|
|
|
59
63
|
|
|
@@ -65,46 +69,32 @@ class HotpotQAHardNegatives(AbsTaskRetrieval):
|
|
|
65
69
|
"revision": "617612fa63afcb60e3b134bed8b7216a99707c37",
|
|
66
70
|
},
|
|
67
71
|
description=(
|
|
68
|
-
"HotpotQA is a question answering dataset featuring natural, multi-hop questions, with strong"
|
|
69
|
-
|
|
72
|
+
"HotpotQA is a question answering dataset featuring natural, multi-hop questions, with strong "
|
|
73
|
+
"supervision for supporting facts to enable more explainable question answering systems. "
|
|
74
|
+
"The hard negative version has been created by pooling the 250 top documents per query from BM25, e5-multilingual-large and e5-mistral-instruct."
|
|
70
75
|
),
|
|
71
|
-
reference="https://hotpotqa.github.io/",
|
|
72
|
-
type="Retrieval",
|
|
73
|
-
category="t2t",
|
|
74
|
-
modalities=["text"],
|
|
75
|
-
eval_splits=["test"],
|
|
76
|
-
eval_langs=["eng-Latn"],
|
|
77
|
-
main_score="ndcg_at_10",
|
|
78
|
-
date=("2018-01-01", "2018-12-31"), # best guess: based on publication date
|
|
79
|
-
domains=["Web", "Written"],
|
|
80
|
-
task_subtypes=["Question answering"],
|
|
81
|
-
license="cc-by-sa-4.0",
|
|
82
|
-
annotations_creators="human-annotated",
|
|
83
|
-
dialect=[],
|
|
84
|
-
sample_creation="found",
|
|
85
|
-
bibtex_citation=r"""
|
|
86
|
-
@inproceedings{yang-etal-2018-hotpotqa,
|
|
87
|
-
address = {Brussels, Belgium},
|
|
88
|
-
author = {Yang, Zhilin and
|
|
89
|
-
Qi, Peng and
|
|
90
|
-
Zhang, Saizheng and
|
|
91
|
-
Bengio, Yoshua and
|
|
92
|
-
Cohen, William and
|
|
93
|
-
Salakhutdinov, Ruslan and
|
|
94
|
-
Manning, Christopher D.},
|
|
95
|
-
booktitle = {Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing},
|
|
96
|
-
doi = {10.18653/v1/D18-1259},
|
|
97
|
-
editor = {Riloff, Ellen and
|
|
98
|
-
Chiang, David and
|
|
99
|
-
Hockenmaier, Julia and
|
|
100
|
-
Tsujii, Jun{'}ichi},
|
|
101
|
-
month = oct # {-} # nov,
|
|
102
|
-
pages = {2369--2380},
|
|
103
|
-
publisher = {Association for Computational Linguistics},
|
|
104
|
-
title = {{H}otpot{QA}: A Dataset for Diverse, Explainable Multi-hop Question Answering},
|
|
105
|
-
url = {https://aclanthology.org/D18-1259},
|
|
106
|
-
year = {2018},
|
|
107
|
-
}
|
|
108
|
-
""",
|
|
109
76
|
adapted_from=["HotpotQA"],
|
|
77
|
+
superseded_by="HotpotQAHardNegatives.v2",
|
|
78
|
+
**_hotpot_qa_metadata,
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class HotpotQAHardNegativesV2(AbsTaskRetrieval):
|
|
83
|
+
metadata = TaskMetadata(
|
|
84
|
+
name="HotpotQAHardNegatives.v2",
|
|
85
|
+
dataset={
|
|
86
|
+
"path": "mteb/HotpotQA_test_top_250_only_w_correct-v2",
|
|
87
|
+
"revision": "617612fa63afcb60e3b134bed8b7216a99707c37",
|
|
88
|
+
},
|
|
89
|
+
description=(
|
|
90
|
+
"HotpotQA is a question answering dataset featuring natural, multi-hop questions, with strong "
|
|
91
|
+
"supervision for supporting facts to enable more explainable question answering systems. "
|
|
92
|
+
"The hard negative version has been created by pooling the 250 top documents per query from BM25, e5-multilingual-large and e5-mistral-instruct."
|
|
93
|
+
"V2 uses a more appropriate prompt rather than the default prompt for retrieval. You can get more information on the effect of different prompt in the [PR](https://github.com/embeddings-benchmark/mteb/pull/3469#issuecomment-3436467106)"
|
|
94
|
+
),
|
|
95
|
+
adapted_from=["HotpotQA"],
|
|
96
|
+
prompt={
|
|
97
|
+
"query": "Given a multi-hop question, retrieve documents that can help answer the question"
|
|
98
|
+
},
|
|
99
|
+
**_hotpot_qa_metadata,
|
|
110
100
|
)
|
|
@@ -5,7 +5,7 @@ from mteb.abstasks.task_metadata import TaskMetadata
|
|
|
5
5
|
class LegalSummarization(AbsTaskRetrieval):
|
|
6
6
|
metadata = TaskMetadata(
|
|
7
7
|
name="LegalSummarization",
|
|
8
|
-
description="The dataset
|
|
8
|
+
description="The dataset consists of 439 pairs of contracts and their summarizations from https://tldrlegal.com and https://tosdr.org/.",
|
|
9
9
|
reference="https://github.com/lauramanor/legal_summarization",
|
|
10
10
|
dataset={
|
|
11
11
|
"path": "mteb/legal_summarization",
|