mteb 2.0.5__py3-none-any.whl → 2.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/__init__.py +10 -1
- mteb/_create_dataloaders.py +2 -0
- mteb/abstasks/_stratification.py +1 -1
- mteb/abstasks/abstask.py +6 -1
- mteb/abstasks/dataset_card_template.md +1 -1
- mteb/abstasks/retrieval.py +2 -1
- mteb/abstasks/retrieval_dataset_loaders.py +1 -1
- mteb/abstasks/task_metadata.py +1 -1
- mteb/benchmarks/benchmarks/__init__.py +2 -0
- mteb/benchmarks/benchmarks/benchmarks.py +82 -11
- mteb/benchmarks/get_benchmark.py +1 -1
- mteb/descriptive_stats/Classification/DutchColaClassification.json +54 -0
- mteb/descriptive_stats/Classification/DutchGovernmentBiasClassification.json +54 -0
- mteb/descriptive_stats/Classification/DutchNewsArticlesClassification.json +90 -0
- mteb/descriptive_stats/Classification/DutchSarcasticHeadlinesClassification.json +54 -0
- mteb/descriptive_stats/Classification/IconclassClassification.json +96 -0
- mteb/descriptive_stats/Classification/OpenTenderClassification.json +222 -0
- mteb/descriptive_stats/Classification/VaccinChatNLClassification.json +1068 -0
- mteb/descriptive_stats/Clustering/DutchNewsArticlesClusteringP2P.json +45 -0
- mteb/descriptive_stats/Clustering/DutchNewsArticlesClusteringS2S.json +45 -0
- mteb/descriptive_stats/Clustering/IconclassClusteringS2S.json +48 -0
- mteb/descriptive_stats/Clustering/OpenTenderClusteringP2P.json +111 -0
- mteb/descriptive_stats/Clustering/OpenTenderClusteringS2S.json +111 -0
- mteb/descriptive_stats/Clustering/VABBClusteringP2P.json +60 -0
- mteb/descriptive_stats/Clustering/VABBClusteringS2S.json +60 -0
- mteb/descriptive_stats/Image/Any2AnyMultilingualRetrieval/XFlickr30kCoT2IRetrieval.json +243 -153
- mteb/descriptive_stats/Image/Any2AnyMultilingualRetrieval/XM3600T2IRetrieval.json +999 -629
- mteb/descriptive_stats/Image/Any2AnyRetrieval/OVENIT2TRetrieval.json +33 -17
- mteb/descriptive_stats/Image/DocumentUnderstanding/MIRACLVisionRetrieval.json +574 -0
- mteb/descriptive_stats/MultilabelClassification/CovidDisinformationNLMultiLabelClassification.json +84 -0
- mteb/descriptive_stats/MultilabelClassification/VABBMultiLabelClassification.json +156 -0
- mteb/descriptive_stats/PairClassification/SICKNLPairClassification.json +35 -0
- mteb/descriptive_stats/PairClassification/XLWICNLPairClassification.json +35 -0
- mteb/descriptive_stats/Retrieval/ClimateFEVERHardNegatives.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/DBPediaHardNegatives.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/DutchNewsArticlesRetrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/FEVERHardNegatives.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/HotpotQAHardNegatives.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/LegalQANLRetrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/OpenTenderRetrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/QuoraRetrievalHardNegatives.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/RiaNewsRetrievalHardNegatives.v2.json +30 -0
- mteb/descriptive_stats/Retrieval/VABBRetrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/VDRMultilingualRetrieval.json +184 -0
- mteb/descriptive_stats/Retrieval/bBSARDNLRetrieval.json +30 -0
- mteb/descriptive_stats/STS/SICK-NL-STS.json +28 -0
- mteb/languages/check_language_code.py +11 -3
- mteb/languages/language_scripts.py +4 -0
- mteb/leaderboard/text_segments.py +1 -1
- mteb/models/model_implementations/b1ade_models.py +1 -1
- mteb/models/model_implementations/bge_models.py +1 -3
- mteb/models/model_implementations/bmretriever_models.py +1 -1
- mteb/models/model_implementations/gme_v_models.py +2 -2
- mteb/models/model_implementations/ibm_granite_models.py +1 -1
- mteb/models/model_implementations/inf_models.py +3 -3
- mteb/models/model_implementations/jina_models.py +12 -2
- mteb/models/model_implementations/llm2vec_models.py +1 -1
- mteb/models/model_implementations/misc_models.py +2 -2
- mteb/models/model_implementations/mxbai_models.py +1 -1
- mteb/models/model_implementations/salesforce_models.py +1 -1
- mteb/models/model_implementations/seed_1_6_embedding_models.py +1 -1
- mteb/models/model_implementations/voyage_v.py +9 -9
- mteb/results/task_result.py +6 -8
- mteb/tasks/classification/dan/angry_tweets_classification.py +2 -2
- mteb/tasks/classification/eng/legal_bench_classification.py +3 -3
- mteb/tasks/classification/mya/myanmar_news.py +2 -2
- mteb/tasks/classification/nld/__init__.py +16 -0
- mteb/tasks/classification/nld/dutch_cola_classification.py +38 -0
- mteb/tasks/classification/nld/dutch_government_bias_classification.py +37 -0
- mteb/tasks/classification/nld/dutch_news_articles_classification.py +30 -0
- mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +36 -0
- mteb/tasks/classification/nld/iconclass_classification.py +41 -0
- mteb/tasks/classification/nld/open_tender_classification.py +38 -0
- mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +46 -0
- mteb/tasks/classification/tha/wongnai_reviews_classification.py +1 -1
- mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -2
- mteb/tasks/clustering/__init__.py +1 -0
- mteb/tasks/clustering/nld/__init__.py +17 -0
- mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +37 -0
- mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +37 -0
- mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +47 -0
- mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +51 -0
- mteb/tasks/clustering/nld/open_tender_clustering_s2s.py +41 -0
- mteb/tasks/clustering/nld/vabb_clustering_p2p.py +51 -0
- mteb/tasks/clustering/nld/vabb_clustering_s2s.py +51 -0
- mteb/tasks/multilabel_classification/__init__.py +1 -0
- mteb/tasks/multilabel_classification/nld/__init__.py +9 -0
- mteb/tasks/multilabel_classification/nld/covid_disinformation_nl_multi_label_classification.py +88 -0
- mteb/tasks/multilabel_classification/nld/vabb_multi_label_classification.py +44 -0
- mteb/tasks/pair_classification/__init__.py +1 -0
- mteb/tasks/pair_classification/multilingual/indic_xnli_pair_classification.py +9 -8
- mteb/tasks/pair_classification/nld/__init__.py +7 -0
- mteb/tasks/pair_classification/nld/sick_nl_pair_classification.py +36 -0
- mteb/tasks/pair_classification/nld/xlwic_nl_pair_classification.py +41 -0
- mteb/tasks/retrieval/code/code_rag.py +8 -8
- mteb/tasks/retrieval/dan/dan_fever_retrieval.py +1 -1
- mteb/tasks/retrieval/dan/tv2_nordretrieval.py +2 -2
- mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +2 -2
- mteb/tasks/retrieval/eng/__init__.py +18 -4
- mteb/tasks/retrieval/eng/climate_fever_retrieval.py +68 -77
- mteb/tasks/retrieval/eng/dbpedia_retrieval.py +55 -50
- mteb/tasks/retrieval/eng/fever_retrieval.py +62 -67
- mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +0 -4
- mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +0 -4
- mteb/tasks/retrieval/eng/hotpot_qa_retrieval.py +57 -67
- mteb/tasks/retrieval/eng/legal_summarization_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +0 -3
- mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +0 -2
- mteb/tasks/retrieval/eng/oven_it2t_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/quora_retrieval.py +51 -46
- mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +0 -4
- mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +0 -4
- mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +0 -2
- mteb/tasks/retrieval/jpn/ja_gov_faqs_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/belebele_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +0 -2
- mteb/tasks/retrieval/multilingual/miracl_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +2 -9
- mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +0 -2
- mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +0 -2
- mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +6 -5
- mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +3 -4
- mteb/tasks/retrieval/nld/__init__.py +10 -0
- mteb/tasks/retrieval/nld/bbsard_nl_retrieval.py +41 -0
- mteb/tasks/retrieval/nld/dutch_news_articles_retrieval.py +30 -0
- mteb/tasks/retrieval/nld/legal_qa_nl_retrieval.py +39 -0
- mteb/tasks/retrieval/nld/open_tender_retrieval.py +38 -0
- mteb/tasks/retrieval/nld/vabb_retrieval.py +41 -0
- mteb/tasks/retrieval/nob/norquad.py +2 -2
- mteb/tasks/retrieval/nob/snl_retrieval.py +2 -2
- mteb/tasks/retrieval/rus/__init__.py +11 -2
- mteb/tasks/retrieval/rus/ria_news_retrieval.py +48 -44
- mteb/tasks/retrieval/tur/tur_hist_quad.py +2 -2
- mteb/tasks/sts/__init__.py +1 -0
- mteb/tasks/sts/nld/__init__.py +5 -0
- mteb/tasks/sts/nld/sick_nl_sts.py +41 -0
- mteb-2.1.1.dist-info/METADATA +253 -0
- {mteb-2.0.5.dist-info → mteb-2.1.1.dist-info}/RECORD +142 -95
- mteb/descriptive_stats/Classification/PersianTextTone.json +0 -56
- mteb/descriptive_stats/Image/Any2TextMutipleChoice/CVBenchCount.json +0 -37
- mteb/descriptive_stats/Image/Any2TextMutipleChoice/CVBenchDepth.json +0 -25
- mteb/descriptive_stats/Image/Any2TextMutipleChoice/CVBenchDistance.json +0 -25
- mteb/descriptive_stats/Image/Any2TextMutipleChoice/CVBenchRelation.json +0 -25
- mteb/descriptive_stats/Image/VisualSTS/STS12VisualSTS.json +0 -20
- mteb/descriptive_stats/Image/VisualSTS/STS13VisualSTS.json +0 -20
- mteb/descriptive_stats/Image/VisualSTS/STS14VisualSTS.json +0 -20
- mteb/descriptive_stats/Image/VisualSTS/STS15VisualSTS.json +0 -20
- mteb/descriptive_stats/Image/VisualSTS/STS16VisualSTS.json +0 -20
- mteb/descriptive_stats/Image/VisualSTS/STS17MultilingualVisualSTS.json +0 -220
- mteb/descriptive_stats/Image/VisualSTS/STSBenchmarkMultilingualVisualSTS.json +0 -402
- mteb/descriptive_stats/Reranking/InstructIR.json +0 -31
- mteb-2.0.5.dist-info/METADATA +0 -455
- {mteb-2.0.5.dist-info → mteb-2.1.1.dist-info}/WHEEL +0 -0
- {mteb-2.0.5.dist-info → mteb-2.1.1.dist-info}/entry_points.txt +0 -0
- {mteb-2.0.5.dist-info → mteb-2.1.1.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.0.5.dist-info → mteb-2.1.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
from mteb.abstasks.clustering import AbsTaskClustering
|
|
2
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class VABBClusteringS2S(AbsTaskClustering):
|
|
6
|
+
max_fraction_of_documents_to_embed = 1.0
|
|
7
|
+
metadata = TaskMetadata(
|
|
8
|
+
name="VABBClusteringS2S",
|
|
9
|
+
dataset={
|
|
10
|
+
"path": "clips/mteb-nl-vabb-cls",
|
|
11
|
+
"revision": "544acc2e46909eab2b49962b043a18b9c9772770",
|
|
12
|
+
},
|
|
13
|
+
description="This dataset contains the fourteenth edition of the Flemish Academic Bibliography for the Social "
|
|
14
|
+
"Sciences and Humanities (VABB-SHW), a database of academic publications from the social sciences "
|
|
15
|
+
"and humanities authored by researchers affiliated to Flemish universities (more information). "
|
|
16
|
+
"Publications in the database are used as one of the parameters of the Flemish performance-based "
|
|
17
|
+
"research funding system",
|
|
18
|
+
reference="https://zenodo.org/records/14214806",
|
|
19
|
+
type="Clustering",
|
|
20
|
+
category="t2c",
|
|
21
|
+
modalities=["text"],
|
|
22
|
+
eval_splits=["test"],
|
|
23
|
+
eval_langs=["nld-Latn"],
|
|
24
|
+
main_score="v_measure",
|
|
25
|
+
date=("2009-11-01", "2010-01-01"),
|
|
26
|
+
domains=["Academic", "Written"],
|
|
27
|
+
task_subtypes=[],
|
|
28
|
+
license="cc-by-nc-sa-4.0",
|
|
29
|
+
annotations_creators="derived",
|
|
30
|
+
dialect=[],
|
|
31
|
+
sample_creation="found",
|
|
32
|
+
bibtex_citation=r"""
|
|
33
|
+
@dataset{aspeslagh2024vabb,
|
|
34
|
+
author = {Aspeslagh, Pieter and Guns, Raf and Engels, Tim C. E.},
|
|
35
|
+
doi = {10.5281/zenodo.14214806},
|
|
36
|
+
publisher = {Zenodo},
|
|
37
|
+
title = {VABB-SHW: Dataset of Flemish Academic Bibliography for the Social Sciences and Humanities (edition 14)},
|
|
38
|
+
url = {https://doi.org/10.5281/zenodo.14214806},
|
|
39
|
+
year = {2024},
|
|
40
|
+
}
|
|
41
|
+
""",
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
def dataset_transform(self):
|
|
45
|
+
for split in self.dataset:
|
|
46
|
+
self.dataset[split] = self.dataset[split].rename_columns(
|
|
47
|
+
{"title": "sentences"}
|
|
48
|
+
)
|
|
49
|
+
self.dataset[split] = self.dataset[split].map(
|
|
50
|
+
lambda ex: {"labels": ex["org_discipline"]}
|
|
51
|
+
)
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
from .covid_disinformation_nl_multi_label_classification import (
|
|
2
|
+
CovidDisinformationNLMultiLabelClassification,
|
|
3
|
+
)
|
|
4
|
+
from .vabb_multi_label_classification import VABBMultiLabelClassification
|
|
5
|
+
|
|
6
|
+
__all__ = [
|
|
7
|
+
"CovidDisinformationNLMultiLabelClassification",
|
|
8
|
+
"VABBMultiLabelClassification",
|
|
9
|
+
]
|
mteb/tasks/multilabel_classification/nld/covid_disinformation_nl_multi_label_classification.py
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
from mteb.abstasks.multilabel_classification import (
|
|
2
|
+
AbsTaskMultilabelClassification,
|
|
3
|
+
)
|
|
4
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class CovidDisinformationNLMultiLabelClassification(AbsTaskMultilabelClassification):
|
|
8
|
+
metadata = TaskMetadata(
|
|
9
|
+
name="CovidDisinformationNLMultiLabelClassification",
|
|
10
|
+
dataset={
|
|
11
|
+
"path": "clips/mteb-nl-COVID-19-disinformation",
|
|
12
|
+
"revision": "7ad922bdef875db1f530847c6ffff05fc154f2e8",
|
|
13
|
+
},
|
|
14
|
+
description="The dataset is curated to address questions of interest to journalists, fact-checkers, "
|
|
15
|
+
"social media platforms, policymakers, and the general public.",
|
|
16
|
+
reference="https://aclanthology.org/2021.findings-emnlp.56.pdf",
|
|
17
|
+
type="MultilabelClassification",
|
|
18
|
+
category="t2c",
|
|
19
|
+
modalities=["text"],
|
|
20
|
+
eval_splits=["test"],
|
|
21
|
+
eval_langs=["nld-Latn"],
|
|
22
|
+
main_score="f1",
|
|
23
|
+
date=("2020-01-01", "2021-04-01"),
|
|
24
|
+
domains=["Web", "Social", "Written"],
|
|
25
|
+
task_subtypes=[],
|
|
26
|
+
license="cc-by-4.0",
|
|
27
|
+
annotations_creators="human-annotated",
|
|
28
|
+
dialect=[],
|
|
29
|
+
sample_creation="found",
|
|
30
|
+
bibtex_citation=r"""
|
|
31
|
+
@inproceedings{alam-etal-2021-fighting-covid,
|
|
32
|
+
address = {Punta Cana, Dominican Republic},
|
|
33
|
+
author = {Alam, Firoj and
|
|
34
|
+
Shaar, Shaden and
|
|
35
|
+
Dalvi, Fahim and
|
|
36
|
+
Sajjad, Hassan and
|
|
37
|
+
Nikolov, Alex and
|
|
38
|
+
Mubarak, Hamdy and
|
|
39
|
+
Da San Martino, Giovanni and
|
|
40
|
+
Abdelali, Ahmed and
|
|
41
|
+
Durrani, Nadir and
|
|
42
|
+
Darwish, Kareem and
|
|
43
|
+
Al-Homaid, Abdulaziz and
|
|
44
|
+
Zaghouani, Wajdi and
|
|
45
|
+
Caselli, Tommaso and
|
|
46
|
+
Danoe, Gijs and
|
|
47
|
+
Stolk, Friso and
|
|
48
|
+
Bruntink, Britt and
|
|
49
|
+
Nakov, Preslav},
|
|
50
|
+
booktitle = {Findings of the Association for Computational Linguistics: EMNLP 2021},
|
|
51
|
+
doi = {10.18653/v1/2021.findings-emnlp.56},
|
|
52
|
+
editor = {Moens, Marie-Francine and
|
|
53
|
+
Huang, Xuanjing and
|
|
54
|
+
Specia, Lucia and
|
|
55
|
+
Yih, Scott Wen-tau},
|
|
56
|
+
month = nov,
|
|
57
|
+
pages = {611--649},
|
|
58
|
+
publisher = {Association for Computational Linguistics},
|
|
59
|
+
title = {Fighting the {COVID}-19 Infodemic: Modeling the Perspective of Journalists, Fact-Checkers, Social Media Platforms, Policy Makers, and the Society},
|
|
60
|
+
url = {https://aclanthology.org/2021.findings-emnlp.56/},
|
|
61
|
+
year = {2021},
|
|
62
|
+
}
|
|
63
|
+
""",
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
def dataset_transform(self) -> None:
|
|
67
|
+
labels = [
|
|
68
|
+
"q2_label",
|
|
69
|
+
"q3_label",
|
|
70
|
+
"q4_label",
|
|
71
|
+
"q5_label",
|
|
72
|
+
"q6_label",
|
|
73
|
+
"q7_label",
|
|
74
|
+
]
|
|
75
|
+
_dataset = {}
|
|
76
|
+
|
|
77
|
+
def map_labels(example):
|
|
78
|
+
ml_labels = []
|
|
79
|
+
for i, label in enumerate(labels):
|
|
80
|
+
if example[label] == "yes":
|
|
81
|
+
ml_labels.append(i)
|
|
82
|
+
return {"label": ml_labels}
|
|
83
|
+
|
|
84
|
+
for split in self.dataset:
|
|
85
|
+
self.dataset[split] = self.dataset[split].filter(
|
|
86
|
+
lambda ex: ex["q1_label"] == "yes"
|
|
87
|
+
)
|
|
88
|
+
self.dataset[split] = self.dataset[split].map(map_labels)
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
from mteb.abstasks.multilabel_classification import (
|
|
2
|
+
AbsTaskMultilabelClassification,
|
|
3
|
+
)
|
|
4
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class VABBMultiLabelClassification(AbsTaskMultilabelClassification):
|
|
8
|
+
samples_per_label = 128
|
|
9
|
+
metadata = TaskMetadata(
|
|
10
|
+
name="VABBMultiLabelClassification",
|
|
11
|
+
dataset={
|
|
12
|
+
"path": "clips/mteb-nl-vabb-mlcls-pr",
|
|
13
|
+
"revision": "584c70f5104671772119f21e9f8a3c912ac07d4a",
|
|
14
|
+
},
|
|
15
|
+
description="This dataset contains the fourteenth edition of the Flemish Academic Bibliography for the Social "
|
|
16
|
+
"Sciences and Humanities (VABB-SHW), a database of academic publications from the social sciences "
|
|
17
|
+
"and humanities authored by researchers affiliated to Flemish universities (more information). "
|
|
18
|
+
"Publications in the database are used as one of the parameters of the Flemish performance-based "
|
|
19
|
+
"research funding system",
|
|
20
|
+
reference="https://zenodo.org/records/14214806",
|
|
21
|
+
type="MultilabelClassification",
|
|
22
|
+
category="t2c",
|
|
23
|
+
modalities=["text"],
|
|
24
|
+
eval_splits=["test"],
|
|
25
|
+
eval_langs=["nld-Latn"],
|
|
26
|
+
main_score="f1",
|
|
27
|
+
date=("2020-01-01", "2021-04-01"),
|
|
28
|
+
domains=["Academic", "Written"],
|
|
29
|
+
task_subtypes=[],
|
|
30
|
+
license="cc-by-4.0",
|
|
31
|
+
annotations_creators="human-annotated",
|
|
32
|
+
dialect=[],
|
|
33
|
+
sample_creation="found",
|
|
34
|
+
bibtex_citation=r"""
|
|
35
|
+
@dataset{aspeslagh2024vabb,
|
|
36
|
+
author = {Aspeslagh, Pieter and Guns, Raf and Engels, Tim C. E.},
|
|
37
|
+
doi = {10.5281/zenodo.14214806},
|
|
38
|
+
publisher = {Zenodo},
|
|
39
|
+
title = {VABB-SHW: Dataset of Flemish Academic Bibliography for the Social Sciences and Humanities (edition 14)},
|
|
40
|
+
url = {https://doi.org/10.5281/zenodo.14214806},
|
|
41
|
+
year = {2024},
|
|
42
|
+
}
|
|
43
|
+
""",
|
|
44
|
+
)
|
|
@@ -23,14 +23,15 @@ class IndicXnliPairClassification(AbsTaskPairClassification):
|
|
|
23
23
|
"path": "mteb/IndicXnliPairClassification",
|
|
24
24
|
"revision": "027e97b9afe84ea3447b57b7705b8864bb2b3a83",
|
|
25
25
|
},
|
|
26
|
-
description=
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
26
|
+
description=(
|
|
27
|
+
"INDICXNLI is similar to existing XNLI dataset in shape/form, but "
|
|
28
|
+
"focuses on Indic language family. "
|
|
29
|
+
"The train (392,702), validation (2,490), and evaluation sets (5,010) of English "
|
|
30
|
+
"XNLI were translated from English into each of the eleven Indic languages. IndicTrans "
|
|
31
|
+
"is a large Transformer-based sequence to sequence model. It is trained on Samanantar "
|
|
32
|
+
"dataset (Ramesh et al., 2021), which is the largest parallel multi- lingual corpus "
|
|
33
|
+
"over eleven Indic languages."
|
|
34
|
+
),
|
|
34
35
|
reference="https://gem-benchmark.com/data_cards/opusparcus",
|
|
35
36
|
category="t2t",
|
|
36
37
|
modalities=["text"],
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
from mteb.abstasks.pair_classification import AbsTaskPairClassification
|
|
2
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class SICKNLPairClassification(AbsTaskPairClassification):
|
|
6
|
+
metadata = TaskMetadata(
|
|
7
|
+
name="SICKNLPairClassification",
|
|
8
|
+
dataset={
|
|
9
|
+
"path": "clips/mteb-nl-sick-pcls-pr",
|
|
10
|
+
"revision": "a13a1892bcb4c077dc416d390389223eea5f20f0",
|
|
11
|
+
},
|
|
12
|
+
description="SICK-NL is a Dutch translation of SICK ",
|
|
13
|
+
reference="https://aclanthology.org/2021.eacl-main.126/",
|
|
14
|
+
type="PairClassification",
|
|
15
|
+
category="t2t",
|
|
16
|
+
modalities=["text"],
|
|
17
|
+
eval_splits=["test"],
|
|
18
|
+
eval_langs=["nld-Latn"],
|
|
19
|
+
main_score="max_ap",
|
|
20
|
+
date=("2020-09-01", "2021-01-01"),
|
|
21
|
+
domains=["Web", "Written"],
|
|
22
|
+
task_subtypes=[],
|
|
23
|
+
license="mit",
|
|
24
|
+
annotations_creators="human-annotated",
|
|
25
|
+
dialect=[],
|
|
26
|
+
sample_creation="machine-translated and verified",
|
|
27
|
+
bibtex_citation=r"""
|
|
28
|
+
@inproceedings{wijnholds2021sick,
|
|
29
|
+
author = {Wijnholds, Gijs and Moortgat, Michael},
|
|
30
|
+
booktitle = {Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume},
|
|
31
|
+
pages = {1474--1479},
|
|
32
|
+
title = {SICK-NL: A Dataset for Dutch Natural Language Inference},
|
|
33
|
+
year = {2021},
|
|
34
|
+
}
|
|
35
|
+
""",
|
|
36
|
+
)
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
from mteb.abstasks.pair_classification import AbsTaskPairClassification
|
|
2
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class XLWICNLPairClassification(AbsTaskPairClassification):
|
|
6
|
+
metadata = TaskMetadata(
|
|
7
|
+
name="XLWICNLPairClassification",
|
|
8
|
+
description="The Word-in-Context dataset (WiC) addresses the dependence on sense inventories by reformulating "
|
|
9
|
+
"the standard disambiguation task as a binary classification problem; but, it is limited to the "
|
|
10
|
+
"English language. We put forward a large multilingual benchmark, XL-WiC, featuring gold standards "
|
|
11
|
+
"in 12 new languages from varied language families and with different degrees of resource "
|
|
12
|
+
"availability, opening room for evaluation scenarios such as zero-shot cross-lingual transfer. ",
|
|
13
|
+
reference="https://aclanthology.org/2020.emnlp-main.584.pdf",
|
|
14
|
+
dataset={
|
|
15
|
+
"path": "clips/mteb-nl-xlwic",
|
|
16
|
+
"revision": "0b33ce358b1b5d500ff3715ba3d777b4d2c21cb0",
|
|
17
|
+
},
|
|
18
|
+
type="PairClassification",
|
|
19
|
+
category="t2t",
|
|
20
|
+
modalities=["text"],
|
|
21
|
+
date=("2019-10-04", "2019-10-04"),
|
|
22
|
+
eval_splits=["test"],
|
|
23
|
+
eval_langs=["nld-Latn"],
|
|
24
|
+
main_score="max_ap",
|
|
25
|
+
domains=["Written"],
|
|
26
|
+
task_subtypes=[],
|
|
27
|
+
license="cc-by-nc-sa-4.0",
|
|
28
|
+
annotations_creators="derived",
|
|
29
|
+
dialect=[],
|
|
30
|
+
sample_creation="created",
|
|
31
|
+
bibtex_citation=r"""
|
|
32
|
+
@inproceedings{raganato2020xl,
|
|
33
|
+
author = {Raganato, A and Pasini, T and Camacho-Collados, J and Pilehvar, M and others},
|
|
34
|
+
booktitle = {Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)},
|
|
35
|
+
organization = {Association for Computational Linguistics (ACL)},
|
|
36
|
+
pages = {7193--7206},
|
|
37
|
+
title = {XL-WiC: A multilingual benchmark for evaluating semantic contextualization},
|
|
38
|
+
year = {2020},
|
|
39
|
+
}
|
|
40
|
+
""",
|
|
41
|
+
)
|
|
@@ -60,9 +60,9 @@ class CodeRAGProgrammingSolutionsRetrieval(AbsTaskRetrieval):
|
|
|
60
60
|
self.data_loaded = True
|
|
61
61
|
|
|
62
62
|
def dataset_transform(self) -> None:
|
|
63
|
-
"""And transform to a retrieval
|
|
63
|
+
"""And transform to a retrieval dataset, which have the following attributes
|
|
64
64
|
|
|
65
|
-
self.corpus = Dict[doc_id, Dict[str, str]] #id => dict with document
|
|
65
|
+
self.corpus = Dict[doc_id, Dict[str, str]] #id => dict with document data like title and text
|
|
66
66
|
self.queries = Dict[query_id, str] #id => query
|
|
67
67
|
self.relevant_docs = Dict[query_id, Dict[[doc_id, score]]
|
|
68
68
|
"""
|
|
@@ -117,9 +117,9 @@ class CodeRAGOnlineTutorialsRetrieval(AbsTaskRetrieval):
|
|
|
117
117
|
self.data_loaded = True
|
|
118
118
|
|
|
119
119
|
def dataset_transform(self) -> None:
|
|
120
|
-
"""And transform to a retrieval
|
|
120
|
+
"""And transform to a retrieval dataset, which have the following attributes
|
|
121
121
|
|
|
122
|
-
self.corpus = Dict[doc_id, Dict[str, str]] #id => dict with document
|
|
122
|
+
self.corpus = Dict[doc_id, Dict[str, str]] #id => dict with document data like title and text
|
|
123
123
|
self.queries = Dict[query_id, str] #id => query
|
|
124
124
|
self.relevant_docs = Dict[query_id, Dict[[doc_id, score]]
|
|
125
125
|
"""
|
|
@@ -177,9 +177,9 @@ class CodeRAGLibraryDocumentationSolutionsRetrieval(AbsTaskRetrieval):
|
|
|
177
177
|
self.data_loaded = True
|
|
178
178
|
|
|
179
179
|
def dataset_transform(self) -> None:
|
|
180
|
-
"""And transform to a retrieval
|
|
180
|
+
"""And transform to a retrieval dataset, which have the following attributes
|
|
181
181
|
|
|
182
|
-
self.corpus = Dict[doc_id, Dict[str, str]] #id => dict with document
|
|
182
|
+
self.corpus = Dict[doc_id, Dict[str, str]] #id => dict with document data like title and text
|
|
183
183
|
self.queries = Dict[query_id, str] #id => query
|
|
184
184
|
self.relevant_docs = Dict[query_id, Dict[[doc_id, score]]
|
|
185
185
|
"""
|
|
@@ -234,9 +234,9 @@ class CodeRAGStackoverflowPostsRetrieval(AbsTaskRetrieval):
|
|
|
234
234
|
self.data_loaded = True
|
|
235
235
|
|
|
236
236
|
def dataset_transform(self) -> None:
|
|
237
|
-
"""And transform to a retrieval
|
|
237
|
+
"""And transform to a retrieval dataset, which have the following attributes
|
|
238
238
|
|
|
239
|
-
self.corpus = Dict[doc_id, Dict[str, str]] #id => dict with document
|
|
239
|
+
self.corpus = Dict[doc_id, Dict[str, str]] #id => dict with document data like title and text
|
|
240
240
|
self.queries = Dict[query_id, str] #id => query
|
|
241
241
|
self.relevant_docs = Dict[query_id, Dict[[doc_id, score]]
|
|
242
242
|
"""
|
|
@@ -56,7 +56,7 @@ Derczynski, Leon},
|
|
|
56
56
|
self.data_loaded = True
|
|
57
57
|
|
|
58
58
|
def dataset_transform(self) -> None:
|
|
59
|
-
"""And transform to a retrieval
|
|
59
|
+
"""And transform to a retrieval dataset, which have the following attributes
|
|
60
60
|
|
|
61
61
|
self.corpus = dict[doc_id, dict[str, str]] #id => dict with document data like title and text
|
|
62
62
|
self.queries = dict[query_id, str] #id => query
|
|
@@ -69,9 +69,9 @@ Piperidis, Stelios},
|
|
|
69
69
|
self.data_loaded = True
|
|
70
70
|
|
|
71
71
|
def dataset_transform(self) -> None:
|
|
72
|
-
"""And transform to a retrieval
|
|
72
|
+
"""And transform to a retrieval dataset, which have the following attributes
|
|
73
73
|
|
|
74
|
-
self.corpus = dict[doc_id, dict[str, str]] #id => dict with document
|
|
74
|
+
self.corpus = dict[doc_id, dict[str, str]] #id => dict with document data like title and text
|
|
75
75
|
self.queries = dict[query_id, str] #id => query
|
|
76
76
|
self.relevant_docs = dict[query_id, dict[[doc_id, score]]
|
|
77
77
|
"""
|
|
@@ -45,9 +45,9 @@ class TwitterHjerneRetrieval(AbsTaskRetrieval):
|
|
|
45
45
|
self.data_loaded = True
|
|
46
46
|
|
|
47
47
|
def dataset_transform(self) -> None:
|
|
48
|
-
"""And transform to a retrieval
|
|
48
|
+
"""And transform to a retrieval dataset, which have the following attributes
|
|
49
49
|
|
|
50
|
-
self.corpus = dict[doc_id, dict[str, str]] #id => dict with document
|
|
50
|
+
self.corpus = dict[doc_id, dict[str, str]] #id => dict with document data like title and text
|
|
51
51
|
self.queries = dict[query_id, str] #id => query
|
|
52
52
|
self.relevant_docs = dict[query_id, dict[[doc_id, score]]
|
|
53
53
|
"""
|
|
@@ -22,6 +22,7 @@ from .cirr_it2i_retrieval import CIRRIT2IRetrieval
|
|
|
22
22
|
from .climate_fever_retrieval import (
|
|
23
23
|
ClimateFEVER,
|
|
24
24
|
ClimateFEVERHardNegatives,
|
|
25
|
+
ClimateFEVERHardNegativesV2,
|
|
25
26
|
ClimateFEVERRetrievalv2,
|
|
26
27
|
)
|
|
27
28
|
from .cqa_dupstack_android_retrieval import CQADupstackAndroidRetrieval
|
|
@@ -57,7 +58,7 @@ from .dapfam_patent_retrieval import (
|
|
|
57
58
|
DAPFAMOutTitlAbsToTitlAbsClmRetrieval,
|
|
58
59
|
DAPFAMOutTitlAbsToTitlAbsRetrieval,
|
|
59
60
|
)
|
|
60
|
-
from .dbpedia_retrieval import DBPedia, DBPediaHardNegatives
|
|
61
|
+
from .dbpedia_retrieval import DBPedia, DBPediaHardNegatives, DBPediaHardNegativesV2
|
|
61
62
|
from .edis_t2it_retrieval import EDIST2ITRetrieval
|
|
62
63
|
from .encyclopedia_vqa_it2it_retrieval import EncyclopediaVQAIT2ITRetrieval
|
|
63
64
|
from .english_finance1_retrieval import EnglishFinance1Retrieval
|
|
@@ -70,7 +71,7 @@ from .fashion200k_i2t_retrieval import Fashion200kI2TRetrieval
|
|
|
70
71
|
from .fashion200k_t2i_retrieval import Fashion200kT2IRetrieval
|
|
71
72
|
from .fashion_iq_it2i_retrieval import FashionIQIT2IRetrieval
|
|
72
73
|
from .feedback_qa_retrieval import FeedbackQARetrieval
|
|
73
|
-
from .fever_retrieval import FEVER, FEVERHardNegatives
|
|
74
|
+
from .fever_retrieval import FEVER, FEVERHardNegatives, FEVERHardNegativesV2
|
|
74
75
|
from .fi_qa2018_retrieval import FiQA2018
|
|
75
76
|
from .fin_qa_retrieval import FinQARetrieval
|
|
76
77
|
from .finance_bench_retrieval import FinanceBenchRetrieval
|
|
@@ -85,7 +86,11 @@ from .hateful_memes_i2t_retrieval import HatefulMemesI2TRetrieval
|
|
|
85
86
|
from .hateful_memes_t2i_retrieval import HatefulMemesT2IRetrieval
|
|
86
87
|
from .hc3_finance_retrieval import HC3FinanceRetrieval
|
|
87
88
|
from .hella_swag_retrieval import HellaSwag
|
|
88
|
-
from .hotpot_qa_retrieval import
|
|
89
|
+
from .hotpot_qa_retrieval import (
|
|
90
|
+
HotpotQA,
|
|
91
|
+
HotpotQAHardNegatives,
|
|
92
|
+
HotpotQAHardNegativesV2,
|
|
93
|
+
)
|
|
89
94
|
from .image_co_de_t2i_retrieval import ImageCoDeT2IRetrieval
|
|
90
95
|
from .info_seek_it2it_retrieval import InfoSeekIT2ITRetrieval
|
|
91
96
|
from .info_seek_it2t_retrieval import InfoSeekIT2TRetrieval
|
|
@@ -133,7 +138,11 @@ from .oven_it2it_retrieval import OVENIT2ITRetrieval
|
|
|
133
138
|
from .oven_it2t_retrieval import OVENIT2TRetrieval
|
|
134
139
|
from .piqa_retrieval import PIQA
|
|
135
140
|
from .quail_retrieval import Quail
|
|
136
|
-
from .quora_retrieval import
|
|
141
|
+
from .quora_retrieval import (
|
|
142
|
+
QuoraRetrieval,
|
|
143
|
+
QuoraRetrievalHardNegatives,
|
|
144
|
+
QuoraRetrievalHardNegativesV2,
|
|
145
|
+
)
|
|
137
146
|
from .r2_med_retrieval import (
|
|
138
147
|
R2MEDBioinformaticsRetrieval,
|
|
139
148
|
R2MEDBiologyRetrieval,
|
|
@@ -247,6 +256,7 @@ __all__ = [
|
|
|
247
256
|
"ChemNQRetrieval",
|
|
248
257
|
"ClimateFEVER",
|
|
249
258
|
"ClimateFEVERHardNegatives",
|
|
259
|
+
"ClimateFEVERHardNegativesV2",
|
|
250
260
|
"ClimateFEVERRetrievalv2",
|
|
251
261
|
"DAPFAMAllTitlAbsClmToFullTextRetrieval",
|
|
252
262
|
"DAPFAMAllTitlAbsClmToTitlAbsClmRetrieval",
|
|
@@ -268,6 +278,7 @@ __all__ = [
|
|
|
268
278
|
"DAPFAMOutTitlAbsToTitlAbsRetrieval",
|
|
269
279
|
"DBPedia",
|
|
270
280
|
"DBPediaHardNegatives",
|
|
281
|
+
"DBPediaHardNegativesV2",
|
|
271
282
|
"EDIST2ITRetrieval",
|
|
272
283
|
"EncyclopediaVQAIT2ITRetrieval",
|
|
273
284
|
"EnglishFinance1Retrieval",
|
|
@@ -276,6 +287,7 @@ __all__ = [
|
|
|
276
287
|
"EnglishFinance4Retrieval",
|
|
277
288
|
"EnglishHealthcare1Retrieval",
|
|
278
289
|
"FEVERHardNegatives",
|
|
290
|
+
"FEVERHardNegativesV2",
|
|
279
291
|
"FaithDialRetrieval",
|
|
280
292
|
"Fashion200kI2TRetrieval",
|
|
281
293
|
"Fashion200kT2IRetrieval",
|
|
@@ -296,6 +308,7 @@ __all__ = [
|
|
|
296
308
|
"HellaSwag",
|
|
297
309
|
"HotpotQA",
|
|
298
310
|
"HotpotQAHardNegatives",
|
|
311
|
+
"HotpotQAHardNegativesV2",
|
|
299
312
|
"ImageCoDeT2IRetrieval",
|
|
300
313
|
"InfoSeekIT2ITRetrieval",
|
|
301
314
|
"InfoSeekIT2TRetrieval",
|
|
@@ -345,6 +358,7 @@ __all__ = [
|
|
|
345
358
|
"Quail",
|
|
346
359
|
"QuoraRetrieval",
|
|
347
360
|
"QuoraRetrievalHardNegatives",
|
|
361
|
+
"QuoraRetrievalHardNegativesV2",
|
|
348
362
|
"R2MEDBioinformaticsRetrieval",
|
|
349
363
|
"R2MEDBiologyRetrieval",
|
|
350
364
|
"R2MEDIIYiClinicalRetrieval",
|