mteb 2.0.4__py3-none-any.whl → 2.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/__init__.py +10 -1
- mteb/benchmarks/benchmarks/__init__.py +2 -0
- mteb/benchmarks/benchmarks/benchmarks.py +75 -0
- mteb/descriptive_stats/BitextMining/BUCC.json +70 -40
- mteb/descriptive_stats/Classification/DKHateClassification.json +40 -24
- mteb/descriptive_stats/Classification/DutchColaClassification.json +54 -0
- mteb/descriptive_stats/Classification/DutchGovernmentBiasClassification.json +54 -0
- mteb/descriptive_stats/Classification/DutchNewsArticlesClassification.json +90 -0
- mteb/descriptive_stats/Classification/DutchSarcasticHeadlinesClassification.json +54 -0
- mteb/descriptive_stats/Classification/FinancialPhrasebankClassification.json +23 -15
- mteb/descriptive_stats/Classification/IconclassClassification.json +96 -0
- mteb/descriptive_stats/Classification/ImdbClassification.json +40 -24
- mteb/descriptive_stats/Classification/KorHateClassification.json +23 -15
- mteb/descriptive_stats/Classification/OpenTenderClassification.json +222 -0
- mteb/descriptive_stats/Classification/VaccinChatNLClassification.json +1068 -0
- mteb/descriptive_stats/Clustering/ArxivClusteringP2P.json +555 -550
- mteb/descriptive_stats/Clustering/ArxivClusteringP2P.v2.json +546 -541
- mteb/descriptive_stats/Clustering/ArxivClusteringS2S.json +555 -550
- mteb/descriptive_stats/Clustering/DutchNewsArticlesClusteringP2P.json +45 -0
- mteb/descriptive_stats/Clustering/DutchNewsArticlesClusteringS2S.json +45 -0
- mteb/descriptive_stats/Clustering/IconclassClusteringS2S.json +48 -0
- mteb/descriptive_stats/Clustering/MLSUMClusteringP2P.json +2466 -2416
- mteb/descriptive_stats/Clustering/OpenTenderClusteringP2P.json +111 -0
- mteb/descriptive_stats/Clustering/OpenTenderClusteringS2S.json +111 -0
- mteb/descriptive_stats/Clustering/RedditClusteringP2P.json +1365 -1360
- mteb/descriptive_stats/Clustering/SNLClustering.json +378 -373
- mteb/descriptive_stats/Clustering/SwednClustering.json +28 -23
- mteb/descriptive_stats/Clustering/VABBClusteringP2P.json +60 -0
- mteb/descriptive_stats/Clustering/VABBClusteringS2S.json +60 -0
- mteb/descriptive_stats/Clustering/VGClustering.json +54 -49
- mteb/descriptive_stats/Image/Any2AnyMultilingualRetrieval/WITT2IRetrieval.json +324 -204
- mteb/descriptive_stats/Image/Any2AnyRetrieval/MemotionI2TRetrieval.json +28 -18
- mteb/descriptive_stats/Image/DocumentUnderstanding/JinaVDRAirbnbSyntheticRetrieval.json +334 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/JinaVDRGitHubReadmeRetrieval.json +544 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/JinaVDRTweetStockSyntheticsRetrieval.json +334 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/JinaVDRWikimediaCommonsDocumentsRetrieval.json +634 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore2ESGReportsRetrieval.json +154 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore2EconomicsReportsRetrieval.json +154 -0
- mteb/descriptive_stats/Image/ImageClassification/Imagenet1k.json +6039 -3007
- mteb/descriptive_stats/Image/ZeroShotClassification/Imagenet1kZeroShot.json +3024 -3010
- mteb/descriptive_stats/Image/ZeroShotClassification/PatchCamelyonZeroShot.json +30 -16
- mteb/descriptive_stats/MultilabelClassification/CovidDisinformationNLMultiLabelClassification.json +84 -0
- mteb/descriptive_stats/MultilabelClassification/VABBMultiLabelClassification.json +156 -0
- mteb/descriptive_stats/PairClassification/SICKNLPairClassification.json +35 -0
- mteb/descriptive_stats/PairClassification/XLWICNLPairClassification.json +35 -0
- mteb/descriptive_stats/Reranking/MIRACLReranking.json +555 -479
- mteb/descriptive_stats/Reranking/MindSmallReranking.json +29 -25
- mteb/descriptive_stats/Retrieval/AlloprofRetrieval.json +25 -26
- mteb/descriptive_stats/Retrieval/Code1Retrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/DanFEVER.json +25 -26
- mteb/descriptive_stats/Retrieval/DutchNewsArticlesRetrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/EnglishFinance1Retrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/EnglishFinance2Retrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/EnglishFinance3Retrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/EnglishFinance4Retrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/EnglishHealthcare1Retrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/French1Retrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/FrenchLegal1Retrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/German1Retrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/GermanHealthcare1Retrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/GermanLegal1Retrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/JapaneseCode1Retrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/JapaneseLegal1Retrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/LegalQANLRetrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/MIRACLRetrieval.json +475 -494
- mteb/descriptive_stats/Retrieval/MSMARCO-Fa.json +25 -26
- mteb/descriptive_stats/Retrieval/MSMARCO.json +25 -84
- mteb/descriptive_stats/Retrieval/OpenTenderRetrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/Touche2020.json +25 -26
- mteb/descriptive_stats/Retrieval/VABBRetrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/bBSARDNLRetrieval.json +30 -0
- mteb/descriptive_stats/STS/SICK-NL-STS.json +28 -0
- mteb/descriptive_stats/Summarization/SummEval.json +27 -50
- mteb/descriptive_stats/Summarization/SummEvalFr.json +27 -50
- mteb/models/model_implementations/kalm_models.py +29 -0
- mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +1 -1
- mteb/tasks/classification/eng/financial_phrasebank_classification.py +0 -3
- mteb/tasks/classification/kor/kor_hate_classification.py +0 -12
- mteb/tasks/classification/nld/__init__.py +16 -0
- mteb/tasks/classification/nld/dutch_cola_classification.py +38 -0
- mteb/tasks/classification/nld/dutch_government_bias_classification.py +37 -0
- mteb/tasks/classification/nld/dutch_news_articles_classification.py +30 -0
- mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +36 -0
- mteb/tasks/classification/nld/iconclass_classification.py +41 -0
- mteb/tasks/classification/nld/open_tender_classification.py +38 -0
- mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +46 -0
- mteb/tasks/clustering/__init__.py +1 -0
- mteb/tasks/clustering/nld/__init__.py +17 -0
- mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +37 -0
- mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +37 -0
- mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +47 -0
- mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +51 -0
- mteb/tasks/clustering/nld/open_tender_clustering_s2s.py +41 -0
- mteb/tasks/clustering/nld/vabb_clustering_p2p.py +51 -0
- mteb/tasks/clustering/nld/vabb_clustering_s2s.py +51 -0
- mteb/tasks/clustering/swe/swedn_clustering.py +2 -2
- mteb/tasks/multilabel_classification/__init__.py +1 -0
- mteb/tasks/multilabel_classification/nld/__init__.py +9 -0
- mteb/tasks/multilabel_classification/nld/covid_disinformation_nl_multi_label_classification.py +88 -0
- mteb/tasks/multilabel_classification/nld/vabb_multi_label_classification.py +44 -0
- mteb/tasks/pair_classification/__init__.py +1 -0
- mteb/tasks/pair_classification/nld/__init__.py +7 -0
- mteb/tasks/pair_classification/nld/sick_nl_pair_classification.py +36 -0
- mteb/tasks/pair_classification/nld/xlwic_nl_pair_classification.py +41 -0
- mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/__init__.py +10 -0
- mteb/tasks/retrieval/nld/bbsard_nl_retrieval.py +41 -0
- mteb/tasks/retrieval/nld/dutch_news_articles_retrieval.py +30 -0
- mteb/tasks/retrieval/nld/legal_qa_nl_retrieval.py +39 -0
- mteb/tasks/retrieval/nld/open_tender_retrieval.py +38 -0
- mteb/tasks/retrieval/nld/vabb_retrieval.py +41 -0
- mteb/tasks/sts/__init__.py +1 -0
- mteb/tasks/sts/nld/__init__.py +5 -0
- mteb/tasks/sts/nld/sick_nl_sts.py +41 -0
- {mteb-2.0.4.dist-info → mteb-2.1.0.dist-info}/METADATA +2 -204
- {mteb-2.0.4.dist-info → mteb-2.1.0.dist-info}/RECORD +120 -49
- {mteb-2.0.4.dist-info → mteb-2.1.0.dist-info}/WHEEL +0 -0
- {mteb-2.0.4.dist-info → mteb-2.1.0.dist-info}/entry_points.txt +0 -0
- {mteb-2.0.4.dist-info → mteb-2.1.0.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.0.4.dist-info → mteb-2.1.0.dist-info}/top_level.txt +0 -0
mteb/tasks/multilabel_classification/nld/covid_disinformation_nl_multi_label_classification.py
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
from mteb.abstasks.multilabel_classification import (
|
|
2
|
+
AbsTaskMultilabelClassification,
|
|
3
|
+
)
|
|
4
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class CovidDisinformationNLMultiLabelClassification(AbsTaskMultilabelClassification):
|
|
8
|
+
metadata = TaskMetadata(
|
|
9
|
+
name="CovidDisinformationNLMultiLabelClassification",
|
|
10
|
+
dataset={
|
|
11
|
+
"path": "clips/mteb-nl-COVID-19-disinformation",
|
|
12
|
+
"revision": "7ad922bdef875db1f530847c6ffff05fc154f2e8",
|
|
13
|
+
},
|
|
14
|
+
description="The dataset is curated to address questions of interest to journalists, fact-checkers, "
|
|
15
|
+
"social media platforms, policymakers, and the general public.",
|
|
16
|
+
reference="https://aclanthology.org/2021.findings-emnlp.56.pdf",
|
|
17
|
+
type="MultilabelClassification",
|
|
18
|
+
category="t2c",
|
|
19
|
+
modalities=["text"],
|
|
20
|
+
eval_splits=["test"],
|
|
21
|
+
eval_langs=["nld-Latn"],
|
|
22
|
+
main_score="f1",
|
|
23
|
+
date=("2020-01-01", "2021-04-01"),
|
|
24
|
+
domains=["Web", "Social", "Written"],
|
|
25
|
+
task_subtypes=[],
|
|
26
|
+
license="cc-by-4.0",
|
|
27
|
+
annotations_creators="human-annotated",
|
|
28
|
+
dialect=[],
|
|
29
|
+
sample_creation="found",
|
|
30
|
+
bibtex_citation=r"""
|
|
31
|
+
@inproceedings{alam-etal-2021-fighting-covid,
|
|
32
|
+
address = {Punta Cana, Dominican Republic},
|
|
33
|
+
author = {Alam, Firoj and
|
|
34
|
+
Shaar, Shaden and
|
|
35
|
+
Dalvi, Fahim and
|
|
36
|
+
Sajjad, Hassan and
|
|
37
|
+
Nikolov, Alex and
|
|
38
|
+
Mubarak, Hamdy and
|
|
39
|
+
Da San Martino, Giovanni and
|
|
40
|
+
Abdelali, Ahmed and
|
|
41
|
+
Durrani, Nadir and
|
|
42
|
+
Darwish, Kareem and
|
|
43
|
+
Al-Homaid, Abdulaziz and
|
|
44
|
+
Zaghouani, Wajdi and
|
|
45
|
+
Caselli, Tommaso and
|
|
46
|
+
Danoe, Gijs and
|
|
47
|
+
Stolk, Friso and
|
|
48
|
+
Bruntink, Britt and
|
|
49
|
+
Nakov, Preslav},
|
|
50
|
+
booktitle = {Findings of the Association for Computational Linguistics: EMNLP 2021},
|
|
51
|
+
doi = {10.18653/v1/2021.findings-emnlp.56},
|
|
52
|
+
editor = {Moens, Marie-Francine and
|
|
53
|
+
Huang, Xuanjing and
|
|
54
|
+
Specia, Lucia and
|
|
55
|
+
Yih, Scott Wen-tau},
|
|
56
|
+
month = nov,
|
|
57
|
+
pages = {611--649},
|
|
58
|
+
publisher = {Association for Computational Linguistics},
|
|
59
|
+
title = {Fighting the {COVID}-19 Infodemic: Modeling the Perspective of Journalists, Fact-Checkers, Social Media Platforms, Policy Makers, and the Society},
|
|
60
|
+
url = {https://aclanthology.org/2021.findings-emnlp.56/},
|
|
61
|
+
year = {2021},
|
|
62
|
+
}
|
|
63
|
+
""",
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
def dataset_transform(self) -> None:
|
|
67
|
+
labels = [
|
|
68
|
+
"q2_label",
|
|
69
|
+
"q3_label",
|
|
70
|
+
"q4_label",
|
|
71
|
+
"q5_label",
|
|
72
|
+
"q6_label",
|
|
73
|
+
"q7_label",
|
|
74
|
+
]
|
|
75
|
+
_dataset = {}
|
|
76
|
+
|
|
77
|
+
def map_labels(example):
|
|
78
|
+
ml_labels = []
|
|
79
|
+
for i, label in enumerate(labels):
|
|
80
|
+
if example[label] == "yes":
|
|
81
|
+
ml_labels.append(i)
|
|
82
|
+
return {"label": ml_labels}
|
|
83
|
+
|
|
84
|
+
for split in self.dataset:
|
|
85
|
+
self.dataset[split] = self.dataset[split].filter(
|
|
86
|
+
lambda ex: ex["q1_label"] == "yes"
|
|
87
|
+
)
|
|
88
|
+
self.dataset[split] = self.dataset[split].map(map_labels)
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
from mteb.abstasks.multilabel_classification import (
|
|
2
|
+
AbsTaskMultilabelClassification,
|
|
3
|
+
)
|
|
4
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class VABBMultiLabelClassification(AbsTaskMultilabelClassification):
|
|
8
|
+
samples_per_label = 128
|
|
9
|
+
metadata = TaskMetadata(
|
|
10
|
+
name="VABBMultiLabelClassification",
|
|
11
|
+
dataset={
|
|
12
|
+
"path": "clips/mteb-nl-vabb-mlcls-pr",
|
|
13
|
+
"revision": "584c70f5104671772119f21e9f8a3c912ac07d4a",
|
|
14
|
+
},
|
|
15
|
+
description="This dataset contains the fourteenth edition of the Flemish Academic Bibliography for the Social "
|
|
16
|
+
"Sciences and Humanities (VABB-SHW), a database of academic publications from the social sciences "
|
|
17
|
+
"and humanities authored by researchers affiliated to Flemish universities (more information). "
|
|
18
|
+
"Publications in the database are used as one of the parameters of the Flemish performance-based "
|
|
19
|
+
"research funding system",
|
|
20
|
+
reference="https://zenodo.org/records/14214806",
|
|
21
|
+
type="MultilabelClassification",
|
|
22
|
+
category="t2c",
|
|
23
|
+
modalities=["text"],
|
|
24
|
+
eval_splits=["test"],
|
|
25
|
+
eval_langs=["nld-Latn"],
|
|
26
|
+
main_score="f1",
|
|
27
|
+
date=("2020-01-01", "2021-04-01"),
|
|
28
|
+
domains=["Academic", "Written"],
|
|
29
|
+
task_subtypes=[],
|
|
30
|
+
license="cc-by-4.0",
|
|
31
|
+
annotations_creators="human-annotated",
|
|
32
|
+
dialect=[],
|
|
33
|
+
sample_creation="found",
|
|
34
|
+
bibtex_citation=r"""
|
|
35
|
+
@dataset{aspeslagh2024vabb,
|
|
36
|
+
author = {Aspeslagh, Pieter and Guns, Raf and Engels, Tim C. E.},
|
|
37
|
+
doi = {10.5281/zenodo.14214806},
|
|
38
|
+
publisher = {Zenodo},
|
|
39
|
+
title = {VABB-SHW: Dataset of Flemish Academic Bibliography for the Social Sciences and Humanities (edition 14)},
|
|
40
|
+
url = {https://doi.org/10.5281/zenodo.14214806},
|
|
41
|
+
year = {2024},
|
|
42
|
+
}
|
|
43
|
+
""",
|
|
44
|
+
)
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
from mteb.abstasks.pair_classification import AbsTaskPairClassification
|
|
2
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class SICKNLPairClassification(AbsTaskPairClassification):
|
|
6
|
+
metadata = TaskMetadata(
|
|
7
|
+
name="SICKNLPairClassification",
|
|
8
|
+
dataset={
|
|
9
|
+
"path": "clips/mteb-nl-sick-pcls-pr",
|
|
10
|
+
"revision": "a13a1892bcb4c077dc416d390389223eea5f20f0",
|
|
11
|
+
},
|
|
12
|
+
description="SICK-NL is a Dutch translation of SICK ",
|
|
13
|
+
reference="https://aclanthology.org/2021.eacl-main.126/",
|
|
14
|
+
type="PairClassification",
|
|
15
|
+
category="t2t",
|
|
16
|
+
modalities=["text"],
|
|
17
|
+
eval_splits=["test"],
|
|
18
|
+
eval_langs=["nld-Latn"],
|
|
19
|
+
main_score="max_ap",
|
|
20
|
+
date=("2020-09-01", "2021-01-01"),
|
|
21
|
+
domains=["Web", "Written"],
|
|
22
|
+
task_subtypes=[],
|
|
23
|
+
license="mit",
|
|
24
|
+
annotations_creators="human-annotated",
|
|
25
|
+
dialect=[],
|
|
26
|
+
sample_creation="machine-translated and verified",
|
|
27
|
+
bibtex_citation=r"""
|
|
28
|
+
@inproceedings{wijnholds2021sick,
|
|
29
|
+
author = {Wijnholds, Gijs and Moortgat, Michael},
|
|
30
|
+
booktitle = {Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume},
|
|
31
|
+
pages = {1474--1479},
|
|
32
|
+
title = {SICK-NL: A Dataset for Dutch Natural Language Inference},
|
|
33
|
+
year = {2021},
|
|
34
|
+
}
|
|
35
|
+
""",
|
|
36
|
+
)
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
from mteb.abstasks.pair_classification import AbsTaskPairClassification
|
|
2
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class XLWICNLPairClassification(AbsTaskPairClassification):
|
|
6
|
+
metadata = TaskMetadata(
|
|
7
|
+
name="XLWICNLPairClassification",
|
|
8
|
+
description="The Word-in-Context dataset (WiC) addresses the dependence on sense inventories by reformulating "
|
|
9
|
+
"the standard disambiguation task as a binary classification problem; but, it is limited to the "
|
|
10
|
+
"English language. We put forward a large multilingual benchmark, XL-WiC, featuring gold standards "
|
|
11
|
+
"in 12 new languages from varied language families and with different degrees of resource "
|
|
12
|
+
"availability, opening room for evaluation scenarios such as zero-shot cross-lingual transfer. ",
|
|
13
|
+
reference="https://aclanthology.org/2020.emnlp-main.584.pdf",
|
|
14
|
+
dataset={
|
|
15
|
+
"path": "clips/mteb-nl-xlwic",
|
|
16
|
+
"revision": "0b33ce358b1b5d500ff3715ba3d777b4d2c21cb0",
|
|
17
|
+
},
|
|
18
|
+
type="PairClassification",
|
|
19
|
+
category="t2t",
|
|
20
|
+
modalities=["text"],
|
|
21
|
+
date=("2019-10-04", "2019-10-04"),
|
|
22
|
+
eval_splits=["test"],
|
|
23
|
+
eval_langs=["nld-Latn"],
|
|
24
|
+
main_score="max_ap",
|
|
25
|
+
domains=["Written"],
|
|
26
|
+
task_subtypes=[],
|
|
27
|
+
license="cc-by-nc-sa-4.0",
|
|
28
|
+
annotations_creators="derived",
|
|
29
|
+
dialect=[],
|
|
30
|
+
sample_creation="created",
|
|
31
|
+
bibtex_citation=r"""
|
|
32
|
+
@inproceedings{raganato2020xl,
|
|
33
|
+
author = {Raganato, A and Pasini, T and Camacho-Collados, J and Pilehvar, M and others},
|
|
34
|
+
booktitle = {Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)},
|
|
35
|
+
organization = {Association for Computational Linguistics (ACL)},
|
|
36
|
+
pages = {7193--7206},
|
|
37
|
+
title = {XL-WiC: A multilingual benchmark for evaluating semantic contextualization},
|
|
38
|
+
year = {2020},
|
|
39
|
+
}
|
|
40
|
+
""",
|
|
41
|
+
)
|
|
@@ -101,7 +101,7 @@ class VDRMultilingualRetrieval(AbsTaskRetrieval):
|
|
|
101
101
|
"revision": "9e26ae152f5950ab1a5ff1c58edade3acc894793",
|
|
102
102
|
},
|
|
103
103
|
type="Retrieval",
|
|
104
|
-
category="
|
|
104
|
+
category="t2i",
|
|
105
105
|
modalities=["text", "image"],
|
|
106
106
|
eval_splits=[_EVAL_SPLIT],
|
|
107
107
|
eval_langs=_LANGS,
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
from .argu_ana_nl_retrieval import ArguAnaNL
|
|
2
|
+
from .bbsard_nl_retrieval import BBSARDNLRetrieval
|
|
2
3
|
from .climate_fevernl_retrieval import ClimateFEVERNL
|
|
3
4
|
from .cqa_dupstack_android_nl_retrieval import CQADupstackAndroidNLRetrieval
|
|
4
5
|
from .cqa_dupstack_english_nl_retrieval import CQADupstackEnglishNLRetrieval
|
|
@@ -13,17 +14,21 @@ from .cqa_dupstack_unix_nl_retrieval import CQADupstackUnixNLRetrieval
|
|
|
13
14
|
from .cqa_dupstack_webmasters_nl_retrieval import CQADupstackWebmastersNLRetrieval
|
|
14
15
|
from .cqa_dupstack_wordpress_nl_retrieval import CQADupstackWordpressNLRetrieval
|
|
15
16
|
from .db_pedia_nl_retrieval import DBPediaNL
|
|
17
|
+
from .dutch_news_articles_retrieval import DutchNewsArticlesRetrieval
|
|
16
18
|
from .fevernl_retrieval import FEVERNL
|
|
17
19
|
from .fi_qa2018_nl_retrieval import FiQA2018NL
|
|
18
20
|
from .hotpot_qanl_retrieval import HotpotQANL
|
|
21
|
+
from .legal_qa_nl_retrieval import LegalQANLRetrieval
|
|
19
22
|
from .mmarconl_retrieval import MMMARCONL
|
|
20
23
|
from .nf_corpus_nl_retrieval import NFCorpusNL
|
|
21
24
|
from .nqnl_retrieval import NQNL
|
|
25
|
+
from .open_tender_retrieval import OpenTenderRetrieval
|
|
22
26
|
from .quora_nl_retrieval import QuoraNLRetrieval
|
|
23
27
|
from .sci_fact_nl_retrieval import SciFactNL
|
|
24
28
|
from .scidocsnl_retrieval import SCIDOCSNL
|
|
25
29
|
from .touche2020_nl_retrieval import Touche2020NL
|
|
26
30
|
from .treccovidnl_retrieval import TRECCOVIDNL
|
|
31
|
+
from .vabb_retrieval import VABBRetrieval
|
|
27
32
|
|
|
28
33
|
__all__ = [
|
|
29
34
|
"FEVERNL",
|
|
@@ -32,6 +37,7 @@ __all__ = [
|
|
|
32
37
|
"SCIDOCSNL",
|
|
33
38
|
"TRECCOVIDNL",
|
|
34
39
|
"ArguAnaNL",
|
|
40
|
+
"BBSARDNLRetrieval",
|
|
35
41
|
"CQADupstackAndroidNLRetrieval",
|
|
36
42
|
"CQADupstackEnglishNLRetrieval",
|
|
37
43
|
"CQADupstackGamingNLRetrieval",
|
|
@@ -46,10 +52,14 @@ __all__ = [
|
|
|
46
52
|
"CQADupstackWordpressNLRetrieval",
|
|
47
53
|
"ClimateFEVERNL",
|
|
48
54
|
"DBPediaNL",
|
|
55
|
+
"DutchNewsArticlesRetrieval",
|
|
49
56
|
"FiQA2018NL",
|
|
50
57
|
"HotpotQANL",
|
|
58
|
+
"LegalQANLRetrieval",
|
|
51
59
|
"NFCorpusNL",
|
|
60
|
+
"OpenTenderRetrieval",
|
|
52
61
|
"QuoraNLRetrieval",
|
|
53
62
|
"SciFactNL",
|
|
54
63
|
"Touche2020NL",
|
|
64
|
+
"VABBRetrieval",
|
|
55
65
|
]
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
from mteb.abstasks.retrieval import AbsTaskRetrieval
|
|
2
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class BBSARDNLRetrieval(AbsTaskRetrieval):
|
|
6
|
+
ignore_identical_ids = True
|
|
7
|
+
|
|
8
|
+
metadata = TaskMetadata(
|
|
9
|
+
name="bBSARDNLRetrieval",
|
|
10
|
+
description="Building on the Belgian Statutory Article Retrieval Dataset (BSARD) in French, we introduce the "
|
|
11
|
+
"bilingual version of this dataset, bBSARD. The dataset contains parallel Belgian statutory "
|
|
12
|
+
"articles in both French and Dutch, along with legal questions from BSARD and their Dutch "
|
|
13
|
+
"translation.",
|
|
14
|
+
reference="https://aclanthology.org/2025.regnlp-1.3.pdf",
|
|
15
|
+
dataset={
|
|
16
|
+
"path": "clips/mteb-nl-bbsard",
|
|
17
|
+
"revision": "52027c212ba9765a3e9737c9cbf9a06ae83cbb93",
|
|
18
|
+
},
|
|
19
|
+
type="Retrieval",
|
|
20
|
+
category="t2t",
|
|
21
|
+
modalities=["text"],
|
|
22
|
+
eval_splits=["test"],
|
|
23
|
+
eval_langs=["nld-Latn"],
|
|
24
|
+
main_score="ndcg_at_10",
|
|
25
|
+
date=("2021-05-01", "2021-08-26"),
|
|
26
|
+
domains=["Legal", "Written"],
|
|
27
|
+
task_subtypes=[],
|
|
28
|
+
license="cc-by-nc-sa-4.0",
|
|
29
|
+
annotations_creators="expert-annotated",
|
|
30
|
+
dialect=[],
|
|
31
|
+
sample_creation="found",
|
|
32
|
+
bibtex_citation=r"""
|
|
33
|
+
@article{lotfi2025bilingual,
|
|
34
|
+
author = {Lotfi, Ehsan and Banar, Nikolay and Yuzbashyan, Nerses and Daelemans, Walter},
|
|
35
|
+
journal = {COLING 2025},
|
|
36
|
+
pages = {10},
|
|
37
|
+
title = {Bilingual BSARD: Extending Statutory Article Retrieval to Dutch},
|
|
38
|
+
year = {2025},
|
|
39
|
+
}
|
|
40
|
+
""",
|
|
41
|
+
)
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
from mteb.abstasks.retrieval import AbsTaskRetrieval
|
|
2
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class DutchNewsArticlesRetrieval(AbsTaskRetrieval):
|
|
6
|
+
metadata = TaskMetadata(
|
|
7
|
+
name="DutchNewsArticlesRetrieval",
|
|
8
|
+
description="This dataset contains all the articles published by the NOS as of the 1st of January 2010. The "
|
|
9
|
+
"data is obtained by scraping the NOS website. The NOS is one of the biggest (online) news "
|
|
10
|
+
"organizations in the Netherlands.",
|
|
11
|
+
reference="https://www.kaggle.com/datasets/maxscheijen/dutch-news-articles",
|
|
12
|
+
dataset={
|
|
13
|
+
"path": "clips/mteb-nl-news-articles-ret",
|
|
14
|
+
"revision": "c8042a86f3eb0d1fcec79a4a44ebf1eafe635462",
|
|
15
|
+
},
|
|
16
|
+
type="Retrieval",
|
|
17
|
+
category="t2t",
|
|
18
|
+
modalities=["text"],
|
|
19
|
+
eval_splits=["test"],
|
|
20
|
+
eval_langs=["nld-Latn"],
|
|
21
|
+
main_score="ndcg_at_10",
|
|
22
|
+
date=("2009-11-01", "2010-01-01"),
|
|
23
|
+
domains=["Written", "News"],
|
|
24
|
+
task_subtypes=["Article retrieval"],
|
|
25
|
+
license="cc-by-nc-sa-4.0",
|
|
26
|
+
annotations_creators="derived",
|
|
27
|
+
dialect=[],
|
|
28
|
+
sample_creation="found",
|
|
29
|
+
bibtex_citation="",
|
|
30
|
+
)
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
from mteb.abstasks.retrieval import AbsTaskRetrieval
|
|
2
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class LegalQANLRetrieval(AbsTaskRetrieval):
|
|
6
|
+
ignore_identical_ids = True
|
|
7
|
+
|
|
8
|
+
metadata = TaskMetadata(
|
|
9
|
+
name="LegalQANLRetrieval",
|
|
10
|
+
description="To this end, we create and publish a Dutch legal QA dataset, consisting of question-answer pairs "
|
|
11
|
+
"with attributions to Dutch law articles.",
|
|
12
|
+
reference="https://aclanthology.org/2024.nllp-1.12/",
|
|
13
|
+
dataset={
|
|
14
|
+
"path": "clips/mteb-nl-legalqa-pr",
|
|
15
|
+
"revision": "8f593522dfbe7ec07055ca9d38a700e7643d3882",
|
|
16
|
+
},
|
|
17
|
+
type="Retrieval",
|
|
18
|
+
category="t2t",
|
|
19
|
+
modalities=["text"],
|
|
20
|
+
eval_splits=["test"],
|
|
21
|
+
eval_langs=["nld-Latn"],
|
|
22
|
+
main_score="ndcg_at_10",
|
|
23
|
+
date=("2021-05-01", "2021-08-26"),
|
|
24
|
+
domains=["Legal", "Written"],
|
|
25
|
+
task_subtypes=[],
|
|
26
|
+
license="cc-by-nc-sa-4.0",
|
|
27
|
+
annotations_creators="expert-annotated",
|
|
28
|
+
dialect=[],
|
|
29
|
+
sample_creation="found",
|
|
30
|
+
bibtex_citation=r"""
|
|
31
|
+
@inproceedings{redelaar2024attributed,
|
|
32
|
+
author = {Redelaar, Felicia and Van Drie, Romy and Verberne, Suzan and De Boer, Maaike},
|
|
33
|
+
booktitle = {Proceedings of the natural legal language processing workshop 2024},
|
|
34
|
+
pages = {154--165},
|
|
35
|
+
title = {Attributed Question Answering for Preconditions in the Dutch Law},
|
|
36
|
+
year = {2024},
|
|
37
|
+
}
|
|
38
|
+
""",
|
|
39
|
+
)
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
from mteb.abstasks.retrieval import AbsTaskRetrieval
|
|
2
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class OpenTenderRetrieval(AbsTaskRetrieval):
|
|
6
|
+
metadata = TaskMetadata(
|
|
7
|
+
name="OpenTenderRetrieval",
|
|
8
|
+
description="This dataset contains Belgian and Dutch tender calls from OpenTender in Dutch",
|
|
9
|
+
reference="https://arxiv.org/abs/2509.12340",
|
|
10
|
+
dataset={
|
|
11
|
+
"path": "clips/mteb-nl-opentender-ret",
|
|
12
|
+
"revision": "83eec1aa9c58f1dc8acfac015f653a9c25bda3f4",
|
|
13
|
+
},
|
|
14
|
+
type="Retrieval",
|
|
15
|
+
category="t2t",
|
|
16
|
+
modalities=["text"],
|
|
17
|
+
eval_splits=["test"],
|
|
18
|
+
eval_langs=["nld-Latn"],
|
|
19
|
+
main_score="ndcg_at_10",
|
|
20
|
+
date=("2009-11-01", "2010-01-01"),
|
|
21
|
+
domains=["Government", "Written"],
|
|
22
|
+
task_subtypes=["Article retrieval"],
|
|
23
|
+
license="cc-by-nc-sa-4.0",
|
|
24
|
+
annotations_creators="derived",
|
|
25
|
+
dialect=[],
|
|
26
|
+
sample_creation="found",
|
|
27
|
+
bibtex_citation=r"""
|
|
28
|
+
@misc{banar2025mtebnle5nlembeddingbenchmark,
|
|
29
|
+
archiveprefix = {arXiv},
|
|
30
|
+
author = {Nikolay Banar and Ehsan Lotfi and Jens Van Nooten and Cristina Arhiliuc and Marija Kliocaite and Walter Daelemans},
|
|
31
|
+
eprint = {2509.12340},
|
|
32
|
+
primaryclass = {cs.CL},
|
|
33
|
+
title = {MTEB-NL and E5-NL: Embedding Benchmark and Models for Dutch},
|
|
34
|
+
url = {https://arxiv.org/abs/2509.12340},
|
|
35
|
+
year = {2025},
|
|
36
|
+
}
|
|
37
|
+
""",
|
|
38
|
+
)
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
from mteb.abstasks.retrieval import AbsTaskRetrieval
|
|
2
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class VABBRetrieval(AbsTaskRetrieval):
|
|
6
|
+
metadata = TaskMetadata(
|
|
7
|
+
name="VABBRetrieval",
|
|
8
|
+
description="This dataset contains the fourteenth edition of the Flemish Academic Bibliography for the Social "
|
|
9
|
+
"Sciences and Humanities (VABB-SHW), a database of academic publications from the social sciences "
|
|
10
|
+
"and humanities authored by researchers affiliated to Flemish universities (more information). "
|
|
11
|
+
"Publications in the database are used as one of the parameters of the Flemish performance-based "
|
|
12
|
+
"research funding system",
|
|
13
|
+
reference="https://zenodo.org/records/14214806",
|
|
14
|
+
dataset={
|
|
15
|
+
"path": "clips/mteb-nl-vabb-ret",
|
|
16
|
+
"revision": "af4a1e5b3ed451103894f86ff6b3ce85085d7b48",
|
|
17
|
+
},
|
|
18
|
+
type="Retrieval",
|
|
19
|
+
category="t2t",
|
|
20
|
+
modalities=["text"],
|
|
21
|
+
eval_splits=["test"],
|
|
22
|
+
eval_langs=["nld-Latn"],
|
|
23
|
+
main_score="ndcg_at_10",
|
|
24
|
+
date=("2009-11-01", "2010-01-01"),
|
|
25
|
+
domains=["Academic", "Written"],
|
|
26
|
+
task_subtypes=["Article retrieval"],
|
|
27
|
+
license="cc-by-nc-sa-4.0",
|
|
28
|
+
annotations_creators="derived",
|
|
29
|
+
dialect=[],
|
|
30
|
+
sample_creation="found",
|
|
31
|
+
bibtex_citation=r"""
|
|
32
|
+
@dataset{aspeslagh2024vabb,
|
|
33
|
+
author = {Aspeslagh, Pieter and Guns, Raf and Engels, Tim C. E.},
|
|
34
|
+
doi = {10.5281/zenodo.14214806},
|
|
35
|
+
publisher = {Zenodo},
|
|
36
|
+
title = {VABB-SHW: Dataset of Flemish Academic Bibliography for the Social Sciences and Humanities (edition 14)},
|
|
37
|
+
url = {https://doi.org/10.5281/zenodo.14214806},
|
|
38
|
+
year = {2024},
|
|
39
|
+
}
|
|
40
|
+
""",
|
|
41
|
+
)
|
mteb/tasks/sts/__init__.py
CHANGED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
from mteb.abstasks import AbsTaskSTS
|
|
2
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class SICKNLSTS(AbsTaskSTS):
|
|
6
|
+
fast_loading = True
|
|
7
|
+
metadata = TaskMetadata(
|
|
8
|
+
name="SICK-NL-STS",
|
|
9
|
+
dataset={
|
|
10
|
+
"path": "clips/mteb-nl-sick-sts-pr",
|
|
11
|
+
"revision": "7f88f003fc4e37ed8cd9ade84e390d871b032fef",
|
|
12
|
+
},
|
|
13
|
+
description="SICK-NL (read: signal), a dataset targeting Natural Language Inference in Dutch. SICK-NL is "
|
|
14
|
+
"obtained by translating the SICK dataset of (Marelli et al., 2014) from English into Dutch.",
|
|
15
|
+
reference="https://aclanthology.org/2021.eacl-main.126/",
|
|
16
|
+
type="STS",
|
|
17
|
+
category="t2t",
|
|
18
|
+
modalities=["text"],
|
|
19
|
+
eval_splits=["test"],
|
|
20
|
+
eval_langs=["nld-Latn"],
|
|
21
|
+
main_score="cosine_spearman",
|
|
22
|
+
date=("2012-01-01", "2017-12-31"),
|
|
23
|
+
domains=["News", "Social", "Web", "Spoken", "Written"],
|
|
24
|
+
task_subtypes=[],
|
|
25
|
+
license="mit",
|
|
26
|
+
annotations_creators="human-annotated",
|
|
27
|
+
dialect=[],
|
|
28
|
+
sample_creation="machine-translated",
|
|
29
|
+
bibtex_citation=r"""
|
|
30
|
+
@inproceedings{wijnholds2021sick,
|
|
31
|
+
author = {Wijnholds, Gijs and Moortgat, Michael},
|
|
32
|
+
booktitle = {Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume},
|
|
33
|
+
pages = {1474--1479},
|
|
34
|
+
title = {SICK-NL: A Dataset for Dutch Natural Language Inference},
|
|
35
|
+
year = {2021},
|
|
36
|
+
}
|
|
37
|
+
""",
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
min_score = 0
|
|
41
|
+
max_score = 5
|