mteb 2.1.6__py3-none-any.whl → 2.1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. mteb/descriptive_stats/Retrieval/ArguAna-NL.v2.json +30 -0
  2. mteb/descriptive_stats/Retrieval/NFCorpus-NL.v2.json +30 -0
  3. mteb/descriptive_stats/Retrieval/SCIDOCS-NL.v2.json +30 -0
  4. mteb/descriptive_stats/Retrieval/SciFact-NL.v2.json +30 -0
  5. mteb/tasks/classification/nld/dutch_book_review_sentiment_classification.py +5 -1
  6. mteb/tasks/classification/nld/dutch_cola_classification.py +3 -0
  7. mteb/tasks/classification/nld/dutch_government_bias_classification.py +3 -0
  8. mteb/tasks/classification/nld/dutch_news_articles_classification.py +3 -0
  9. mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +3 -0
  10. mteb/tasks/classification/nld/iconclass_classification.py +3 -0
  11. mteb/tasks/classification/nld/open_tender_classification.py +3 -0
  12. mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +3 -0
  13. mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +3 -0
  14. mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +3 -0
  15. mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +3 -0
  16. mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +3 -0
  17. mteb/tasks/clustering/nld/open_tender_clustering_s2s.py +3 -0
  18. mteb/tasks/clustering/nld/vabb_clustering_p2p.py +3 -0
  19. mteb/tasks/clustering/nld/vabb_clustering_s2s.py +3 -0
  20. mteb/tasks/multilabel_classification/nld/covid_disinformation_nl_multi_label_classification.py +3 -0
  21. mteb/tasks/multilabel_classification/nld/vabb_multi_label_classification.py +3 -0
  22. mteb/tasks/pair_classification/nld/sick_nl_pair_classification.py +3 -0
  23. mteb/tasks/pair_classification/nld/xlwic_nl_pair_classification.py +3 -0
  24. mteb/tasks/retrieval/nld/__init__.py +8 -4
  25. mteb/tasks/retrieval/nld/argu_ana_nl_retrieval.py +46 -27
  26. mteb/tasks/retrieval/nld/bbsard_nl_retrieval.py +3 -0
  27. mteb/tasks/retrieval/nld/dutch_news_articles_retrieval.py +3 -0
  28. mteb/tasks/retrieval/nld/legal_qa_nl_retrieval.py +3 -0
  29. mteb/tasks/retrieval/nld/nf_corpus_nl_retrieval.py +42 -25
  30. mteb/tasks/retrieval/nld/open_tender_retrieval.py +3 -0
  31. mteb/tasks/retrieval/nld/sci_fact_nl_retrieval.py +42 -24
  32. mteb/tasks/retrieval/nld/scidocsnl_retrieval.py +44 -27
  33. mteb/tasks/retrieval/nld/vabb_retrieval.py +3 -0
  34. mteb/tasks/sts/nld/sick_nl_sts.py +1 -0
  35. {mteb-2.1.6.dist-info → mteb-2.1.7.dist-info}/METADATA +1 -1
  36. {mteb-2.1.6.dist-info → mteb-2.1.7.dist-info}/RECORD +40 -36
  37. {mteb-2.1.6.dist-info → mteb-2.1.7.dist-info}/WHEEL +0 -0
  38. {mteb-2.1.6.dist-info → mteb-2.1.7.dist-info}/entry_points.txt +0 -0
  39. {mteb-2.1.6.dist-info → mteb-2.1.7.dist-info}/licenses/LICENSE +0 -0
  40. {mteb-2.1.6.dist-info → mteb-2.1.7.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,30 @@
1
+ {
2
+ "test": {
3
+ "num_samples": 10080,
4
+ "number_of_characters": 11742019,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 9897099,
7
+ "min_text_length": 1,
8
+ "average_text_length": 1141.0074936592114,
9
+ "max_text_length": 7337,
10
+ "unique_texts": 8624
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 1844920,
15
+ "min_text_length": 252,
16
+ "average_text_length": 1312.176386913229,
17
+ "max_text_length": 6050,
18
+ "unique_texts": 1298
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 1406,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 1.0,
25
+ "max_relevant_docs_per_query": 1,
26
+ "unique_relevant_docs": 1406
27
+ },
28
+ "top_ranked_statistics": null
29
+ }
30
+ }
@@ -0,0 +1,30 @@
1
+ {
2
+ "test": {
3
+ "num_samples": 3956,
4
+ "number_of_characters": 6345348,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 6337710,
7
+ "min_text_length": 144,
8
+ "average_text_length": 1744.483897605285,
9
+ "max_text_length": 8480,
10
+ "unique_texts": 3593
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 7638,
15
+ "min_text_length": 3,
16
+ "average_text_length": 23.647058823529413,
17
+ "max_text_length": 89,
18
+ "unique_texts": 323
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 12334,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 38.18575851393189,
25
+ "max_relevant_docs_per_query": 475,
26
+ "unique_relevant_docs": 3128
27
+ },
28
+ "top_ranked_statistics": null
29
+ }
30
+ }
@@ -0,0 +1,30 @@
1
+ {
2
+ "test": {
3
+ "num_samples": 26657,
4
+ "number_of_characters": 34261482,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 34181168,
7
+ "min_text_length": 10,
8
+ "average_text_length": 1332.2355692403632,
9
+ "max_text_length": 9275,
10
+ "unique_texts": 25656
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 80314,
15
+ "min_text_length": 17,
16
+ "average_text_length": 80.314,
17
+ "max_text_length": 227,
18
+ "unique_texts": 1000
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 4928,
23
+ "min_relevant_docs_per_query": 27,
24
+ "average_relevant_docs_per_query": 4.928,
25
+ "max_relevant_docs_per_query": 30,
26
+ "unique_relevant_docs": 25657
27
+ },
28
+ "top_ranked_statistics": null
29
+ }
30
+ }
@@ -0,0 +1,30 @@
1
+ {
2
+ "test": {
3
+ "num_samples": 5483,
4
+ "number_of_characters": 8526662,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 8496576,
7
+ "min_text_length": 228,
8
+ "average_text_length": 1639.3162261238665,
9
+ "max_text_length": 9187,
10
+ "unique_texts": 5183
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 30086,
15
+ "min_text_length": 24,
16
+ "average_text_length": 100.28666666666666,
17
+ "max_text_length": 228,
18
+ "unique_texts": 300
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 339,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 1.13,
25
+ "max_relevant_docs_per_query": 5,
26
+ "unique_relevant_docs": 283
27
+ },
28
+ "top_ranked_statistics": null
29
+ }
30
+ }
@@ -49,7 +49,8 @@ class DutchBookReviewSentimentClassificationV2(AbsTaskClassification):
49
49
  metadata = TaskMetadata(
50
50
  name="DutchBookReviewSentimentClassification.v2",
51
51
  description="""A Dutch book review for sentiment classification.
52
- This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900)""",
52
+ This version corrects errors found in the original data. For details, see [pull request](https://github.com/embeddings-benchmark/mteb/pull/2900).
53
+ Additionally, a Dutch prompt was included.""",
53
54
  reference="https://github.com/benjaminvdb/DBRD",
54
55
  dataset={
55
56
  "path": "mteb/dutch_book_review_sentiment",
@@ -86,4 +87,7 @@ Suzan, Verberne},
86
87
  }
87
88
  """,
88
89
  adapted_from=["DutchBookReviewSentimentClassification"],
90
+ prompt={
91
+ "query": "Classificeer de gegeven boekrecensie als positieve of negatieve sentiment"
92
+ },
89
93
  )
@@ -35,4 +35,7 @@ class DutchColaClassification(AbsTaskClassification):
35
35
  year = {2024},
36
36
  }
37
37
  """,
38
+ prompt={
39
+ "query": "Classificeer de gegeven zin als grammaticaal aanvaardbaar of niet aanvaardbaar"
40
+ },
38
41
  )
@@ -34,4 +34,7 @@ class DutchGovernmentBiasClassification(AbsTaskClassification):
34
34
  year = {2025},
35
35
  }
36
36
  """,
37
+ prompt={
38
+ "query": "Classificeer het gegeven overheidsdocument als bevooroordeeld of niet bevooroordeeld"
39
+ },
37
40
  )
@@ -27,4 +27,7 @@ class DutchNewsArticlesClassification(AbsTaskClassification):
27
27
  dialect=[],
28
28
  sample_creation="found",
29
29
  bibtex_citation="",
30
+ prompt={
31
+ "query": "Classificeer het gegeven nieuwsartikel in het juiste onderwerp of thema"
32
+ },
30
33
  )
@@ -27,6 +27,9 @@ class DutchSarcasticHeadlinesClassification(AbsTaskClassification):
27
27
  dialect=[],
28
28
  sample_creation="found",
29
29
  bibtex_citation="""""",
30
+ prompt={
31
+ "query": "Classificeer de gegeven krantenkop als sarcastisch of niet sarcastisch"
32
+ },
30
33
  )
31
34
 
32
35
  def dataset_transform(self):
@@ -38,4 +38,7 @@ class IconclassClassification(AbsTaskClassification):
38
38
  year = {2023},
39
39
  }
40
40
  """,
41
+ prompt={
42
+ "query": "Classificeer de gegeven titel van het kunstwerk in het juiste onderwerp of thema"
43
+ },
41
44
  )
@@ -35,4 +35,7 @@ class OpenTenderClassification(AbsTaskClassification):
35
35
  year = {2025},
36
36
  }
37
37
  """,
38
+ prompt={
39
+ "query": "Classificeer de gegeven aanbestedingsbeschrijving in het juiste onderwerp of thema"
40
+ },
38
41
  )
@@ -37,6 +37,9 @@ class VaccinChatNLClassification(AbsTaskClassification):
37
37
  year = {2022},
38
38
  }
39
39
  """,
40
+ prompt={
41
+ "query": "Gegeven een gebruikersuiting als query, bepaal de gebruikersintenties"
42
+ },
40
43
  )
41
44
 
42
45
  def dataset_transform(self):
@@ -28,6 +28,9 @@ class DutchNewsArticlesClusteringP2P(AbsTaskClustering):
28
28
  dialect=[],
29
29
  sample_creation="found",
30
30
  bibtex_citation="",
31
+ prompt={
32
+ "query": "Identificeer de hoofdcategorie van nieuwsartikelen op basis van de titels en de inhoud"
33
+ },
31
34
  )
32
35
 
33
36
  def dataset_transform(self):
@@ -28,6 +28,9 @@ class DutchNewsArticlesClusteringS2S(AbsTaskClustering):
28
28
  dialect=[],
29
29
  sample_creation="found",
30
30
  bibtex_citation="",
31
+ prompt={
32
+ "query": "Identificeer de hoofdcategorie van nieuwsartikelen op basis van de titels"
33
+ },
31
34
  )
32
35
 
33
36
  def dataset_transform(self):
@@ -38,6 +38,9 @@ class IconclassClusteringS2S(AbsTaskClustering):
38
38
  year = {2023},
39
39
  }
40
40
  """,
41
+ prompt={
42
+ "query": "Identificeer het onderwerp of thema van kunstwerken op basis van de titels"
43
+ },
41
44
  )
42
45
 
43
46
  def dataset_transform(self):
@@ -38,6 +38,9 @@ class OpenTenderClusteringP2P(AbsTaskClustering):
38
38
  year = {2025},
39
39
  }
40
40
  """,
41
+ prompt={
42
+ "query": "Identificeer de hoofdcategorie van aanbestedingen op basis van de titels en beschrijvingen"
43
+ },
41
44
  )
42
45
 
43
46
  def dataset_transform(self):
@@ -38,4 +38,7 @@ class OpenTenderClusteringS2S(AbsTaskClustering):
38
38
  year = {2025},
39
39
  }
40
40
  """,
41
+ prompt={
42
+ "query": "Identificeer de hoofdcategorie van aanbestedingen op basis van de titels"
43
+ },
41
44
  )
@@ -39,6 +39,9 @@ class VABBClusteringP2P(AbsTaskClustering):
39
39
  year = {2024},
40
40
  }
41
41
  """,
42
+ prompt={
43
+ "query": "Identificeer de hoofdcategorie van wetenschappelijke artikelen op basis van de titels en abstracts"
44
+ },
42
45
  )
43
46
 
44
47
  def dataset_transform(self):
@@ -39,6 +39,9 @@ class VABBClusteringS2S(AbsTaskClustering):
39
39
  year = {2024},
40
40
  }
41
41
  """,
42
+ prompt={
43
+ "query": "Identificeer de hoofdcategorie van wetenschappelijke artikelen op basis van de titels"
44
+ },
42
45
  )
43
46
 
44
47
  def dataset_transform(self):
@@ -61,6 +61,9 @@ Yih, Scott Wen-tau},
61
61
  year = {2021},
62
62
  }
63
63
  """,
64
+ prompt={
65
+ "query": "Classificeer COVID-19-gerelateerde sociale media-berichten in alle toepasselijke desinformatiecategorieën"
66
+ },
64
67
  )
65
68
 
66
69
  def dataset_transform(self) -> None:
@@ -41,4 +41,7 @@ class VABBMultiLabelClassification(AbsTaskMultilabelClassification):
41
41
  year = {2024},
42
42
  }
43
43
  """,
44
+ prompt={
45
+ "query": "Classificeer de onderwerpen van een wetenschappelijk artikel op basis van de abstract"
46
+ },
44
47
  )
@@ -33,4 +33,7 @@ class SICKNLPairClassification(AbsTaskPairClassification):
33
33
  year = {2021},
34
34
  }
35
35
  """,
36
+ prompt={
37
+ "query": "Zoek tekst die semantisch vergelijkbaar is met de gegeven tekst."
38
+ },
36
39
  )
@@ -38,4 +38,7 @@ class XLWICNLPairClassification(AbsTaskPairClassification):
38
38
  year = {2020},
39
39
  }
40
40
  """,
41
+ prompt={
42
+ "query": "Zoek tekst die semantisch vergelijkbaar is met de gegeven tekst."
43
+ },
41
44
  )
@@ -1,4 +1,4 @@
1
- from .argu_ana_nl_retrieval import ArguAnaNL
1
+ from .argu_ana_nl_retrieval import ArguAnaNL, ArguAnaNLv2
2
2
  from .bbsard_nl_retrieval import BBSARDNLRetrieval
3
3
  from .climate_fevernl_retrieval import ClimateFEVERNL
4
4
  from .cqa_dupstack_android_nl_retrieval import CQADupstackAndroidNLRetrieval
@@ -20,12 +20,12 @@ from .fi_qa2018_nl_retrieval import FiQA2018NL
20
20
  from .hotpot_qanl_retrieval import HotpotQANL
21
21
  from .legal_qa_nl_retrieval import LegalQANLRetrieval
22
22
  from .mmarconl_retrieval import MMMARCONL
23
- from .nf_corpus_nl_retrieval import NFCorpusNL
23
+ from .nf_corpus_nl_retrieval import NFCorpusNL, NFCorpusNLv2
24
24
  from .nqnl_retrieval import NQNL
25
25
  from .open_tender_retrieval import OpenTenderRetrieval
26
26
  from .quora_nl_retrieval import QuoraNLRetrieval
27
- from .sci_fact_nl_retrieval import SciFactNL
28
- from .scidocsnl_retrieval import SCIDOCSNL
27
+ from .sci_fact_nl_retrieval import SciFactNL, SciFactNLv2
28
+ from .scidocsnl_retrieval import SCIDOCSNL, SCIDOCSNLv2
29
29
  from .touche2020_nl_retrieval import Touche2020NL
30
30
  from .treccovidnl_retrieval import TRECCOVIDNL
31
31
  from .vabb_retrieval import VABBRetrieval
@@ -37,6 +37,7 @@ __all__ = [
37
37
  "SCIDOCSNL",
38
38
  "TRECCOVIDNL",
39
39
  "ArguAnaNL",
40
+ "ArguAnaNLv2",
40
41
  "BBSARDNLRetrieval",
41
42
  "CQADupstackAndroidNLRetrieval",
42
43
  "CQADupstackEnglishNLRetrieval",
@@ -57,9 +58,12 @@ __all__ = [
57
58
  "HotpotQANL",
58
59
  "LegalQANLRetrieval",
59
60
  "NFCorpusNL",
61
+ "NFCorpusNLv2",
60
62
  "OpenTenderRetrieval",
61
63
  "QuoraNLRetrieval",
64
+ "SCIDOCSNLv2",
62
65
  "SciFactNL",
66
+ "SciFactNLv2",
63
67
  "Touche2020NL",
64
68
  "VABBRetrieval",
65
69
  ]
@@ -1,33 +1,26 @@
1
1
  from mteb.abstasks.retrieval import AbsTaskRetrieval
2
2
  from mteb.abstasks.task_metadata import TaskMetadata
3
3
 
4
-
5
- class ArguAnaNL(AbsTaskRetrieval):
6
- ignore_identical_ids = True
7
-
8
- metadata = TaskMetadata(
9
- name="ArguAna-NL",
10
- description="ArguAna involves the task of retrieval of the best counterargument to an argument. ArguAna-NL is "
11
- "a Dutch translation.",
12
- reference="https://huggingface.co/datasets/clips/beir-nl-arguana",
13
- dataset={
14
- "path": "clips/beir-nl-arguana",
15
- "revision": "4cd085d148fe2cac923bb7758d6ef585926170ba",
16
- },
17
- type="Retrieval",
18
- category="t2t",
19
- modalities=["text"],
20
- eval_splits=["test"],
21
- eval_langs=["nld-Latn"],
22
- main_score="ndcg_at_10",
23
- date=("2016-03-01", "2016-03-01"), # best guess: based on publication date
24
- domains=["Written", "Non-fiction"],
25
- task_subtypes=[],
26
- license="cc-by-sa-4.0",
27
- annotations_creators="derived",
28
- dialect=[],
29
- sample_creation="machine-translated and verified", # manually checked a small subset
30
- bibtex_citation=r"""
4
+ _argu_ana_nl_metadata = dict(
5
+ reference="https://huggingface.co/datasets/clips/beir-nl-arguana",
6
+ dataset={
7
+ "path": "clips/beir-nl-arguana",
8
+ "revision": "4cd085d148fe2cac923bb7758d6ef585926170ba",
9
+ },
10
+ type="Retrieval",
11
+ category="t2t",
12
+ modalities=["text"],
13
+ eval_splits=["test"],
14
+ eval_langs=["nld-Latn"],
15
+ main_score="ndcg_at_10",
16
+ date=("2016-03-01", "2016-03-01"), # best guess: based on publication date
17
+ domains=["Written", "Non-fiction"],
18
+ task_subtypes=[],
19
+ license="cc-by-sa-4.0",
20
+ annotations_creators="derived",
21
+ dialect=[],
22
+ sample_creation="machine-translated and verified", # manually checked a small subset
23
+ bibtex_citation=r"""
31
24
  @misc{banar2024beirnlzeroshotinformationretrieval,
32
25
  archiveprefix = {arXiv},
33
26
  author = {Nikolay Banar and Ehsan Lotfi and Walter Daelemans},
@@ -38,5 +31,31 @@ class ArguAnaNL(AbsTaskRetrieval):
38
31
  year = {2024},
39
32
  }
40
33
  """,
34
+ )
35
+
36
+
37
+ class ArguAnaNL(AbsTaskRetrieval):
38
+ ignore_identical_ids = True
39
+
40
+ metadata = TaskMetadata(
41
+ name="ArguAna-NL",
42
+ description="ArguAna involves the task of retrieval of the best counterargument to an argument. ArguAna-NL is "
43
+ "a Dutch translation.",
41
44
  adapted_from=["ArguAna"],
45
+ **_argu_ana_nl_metadata,
46
+ )
47
+
48
+
49
+ class ArguAnaNLv2(AbsTaskRetrieval):
50
+ ignore_identical_ids = True
51
+
52
+ metadata = TaskMetadata(
53
+ name="ArguAna-NL.v2",
54
+ description="ArguAna involves the task of retrieval of the best counterargument to an argument. ArguAna-NL is "
55
+ "a Dutch translation. This version adds a Dutch prompt to the dataset.",
56
+ prompt={
57
+ "query": "Gegeven een bewering, vind documenten die de bewering weerleggen"
58
+ },
59
+ adapted_from=["ArguAna-NL"],
60
+ **_argu_ana_nl_metadata,
42
61
  )
@@ -38,4 +38,7 @@ class BBSARDNLRetrieval(AbsTaskRetrieval):
38
38
  year = {2025},
39
39
  }
40
40
  """,
41
+ prompt={
42
+ "query": "Gegeven een juridische vraag, haal documenten op die kunnen helpen bij het beantwoorden van de vraag"
43
+ },
41
44
  )
@@ -27,4 +27,7 @@ class DutchNewsArticlesRetrieval(AbsTaskRetrieval):
27
27
  dialect=[],
28
28
  sample_creation="found",
29
29
  bibtex_citation="",
30
+ prompt={
31
+ "query": "Gegeven een titel, haal het nieuwsartikel op dat het beste bij de titel past"
32
+ },
30
33
  )
@@ -36,4 +36,7 @@ class LegalQANLRetrieval(AbsTaskRetrieval):
36
36
  year = {2024},
37
37
  }
38
38
  """,
39
+ prompt={
40
+ "query": "Gegeven een juridische vraag, haal documenten op die kunnen helpen bij het beantwoorden van de vraag"
41
+ },
39
42
  )
@@ -1,31 +1,26 @@
1
1
  from mteb.abstasks.retrieval import AbsTaskRetrieval
2
2
  from mteb.abstasks.task_metadata import TaskMetadata
3
3
 
4
-
5
- class NFCorpusNL(AbsTaskRetrieval):
6
- metadata = TaskMetadata(
7
- name="NFCorpus-NL",
8
- dataset={
9
- "path": "clips/beir-nl-nfcorpus",
10
- "revision": "942953e674fd0f619ff89897abb806dc3df5dd39",
11
- },
12
- description="NFCorpus: A Full-Text Learning to Rank Dataset for Medical Information Retrieval. NFCorpus-NL is "
13
- "a Dutch translation.",
14
- reference="https://huggingface.co/datasets/clips/beir-nl-nfcorpus",
15
- type="Retrieval",
16
- category="t2t",
17
- modalities=["text"],
18
- eval_splits=["test"],
19
- eval_langs=["nld-Latn"],
20
- main_score="ndcg_at_10",
21
- date=("2016-03-01", "2016-03-01"), # best guess: based on publication date
22
- domains=["Medical", "Academic", "Written"],
23
- task_subtypes=[],
24
- license="cc-by-4.0",
25
- annotations_creators="derived",
26
- dialect=[],
27
- sample_creation="machine-translated and verified", # manually checked a small subset
28
- bibtex_citation=r"""
4
+ _nf_corpus_metadata = dict(
5
+ dataset={
6
+ "path": "clips/beir-nl-nfcorpus",
7
+ "revision": "942953e674fd0f619ff89897abb806dc3df5dd39",
8
+ },
9
+ reference="https://huggingface.co/datasets/clips/beir-nl-nfcorpus",
10
+ type="Retrieval",
11
+ category="t2t",
12
+ modalities=["text"],
13
+ eval_splits=["test"],
14
+ eval_langs=["nld-Latn"],
15
+ main_score="ndcg_at_10",
16
+ date=("2016-03-01", "2016-03-01"), # best guess: based on publication date
17
+ domains=["Medical", "Academic", "Written"],
18
+ task_subtypes=[],
19
+ license="cc-by-4.0",
20
+ annotations_creators="derived",
21
+ dialect=[],
22
+ sample_creation="machine-translated and verified", # manually checked a small subset
23
+ bibtex_citation=r"""
29
24
  @misc{banar2024beirnlzeroshotinformationretrieval,
30
25
  archiveprefix = {arXiv},
31
26
  author = {Nikolay Banar and Ehsan Lotfi and Walter Daelemans},
@@ -36,5 +31,27 @@ class NFCorpusNL(AbsTaskRetrieval):
36
31
  year = {2024},
37
32
  }
38
33
  """,
34
+ )
35
+
36
+
37
+ class NFCorpusNL(AbsTaskRetrieval):
38
+ metadata = TaskMetadata(
39
+ name="NFCorpus-NL",
40
+ description="NFCorpus: A Full-Text Learning to Rank Dataset for Medical Information Retrieval. NFCorpus-NL is "
41
+ "a Dutch translation.",
39
42
  adapted_from=["NFCorpus"],
43
+ **_nf_corpus_metadata,
44
+ )
45
+
46
+
47
+ class NFCorpusNLv2(AbsTaskRetrieval):
48
+ metadata = TaskMetadata(
49
+ name="NFCorpus-NL.v2",
50
+ description="NFCorpus: A Full-Text Learning to Rank Dataset for Medical Information Retrieval. NFCorpus-NL is "
51
+ "a Dutch translation. This version adds a Dutch prompt to the dataset.",
52
+ adapted_from=["NFCorpus-NL"],
53
+ prompt={
54
+ "query": "Gegeven een vraag, haal relevante documenten op die de vraag het beste beantwoorden"
55
+ },
56
+ **_nf_corpus_metadata,
40
57
  )
@@ -35,4 +35,7 @@ class OpenTenderRetrieval(AbsTaskRetrieval):
35
35
  year = {2025},
36
36
  }
37
37
  """,
38
+ prompt={
39
+ "query": "Gegeven een titel, haal de aanbestedingsbeschrijving op die het beste bij de titel past"
40
+ },
38
41
  )
@@ -1,30 +1,26 @@
1
1
  from mteb.abstasks.retrieval import AbsTaskRetrieval
2
2
  from mteb.abstasks.task_metadata import TaskMetadata
3
3
 
4
-
5
- class SciFactNL(AbsTaskRetrieval):
6
- metadata = TaskMetadata(
7
- name="SciFact-NL",
8
- dataset={
9
- "path": "clips/beir-nl-scifact",
10
- "revision": "856d8dfc294b138856bbf3042450e3782321e44e",
11
- },
12
- description="SciFactNL verifies scientific claims in Dutch using evidence from the research literature containing scientific paper abstracts.",
13
- reference="https://huggingface.co/datasets/clips/beir-nl-scifact",
14
- type="Retrieval",
15
- category="t2t",
16
- modalities=["text"],
17
- eval_splits=["test"],
18
- eval_langs=["nld-Latn"],
19
- main_score="ndcg_at_10",
20
- date=("2020-05-01", "2020-05-01"), # best guess: based on submission date
21
- domains=["Academic", "Medical", "Written"],
22
- task_subtypes=[],
23
- license="cc-by-4.0",
24
- annotations_creators="derived",
25
- dialect=[],
26
- sample_creation="machine-translated and verified", # manually checked a small subset
27
- bibtex_citation=r"""
4
+ _sci_fact_nl_metadata = dict(
5
+ dataset={
6
+ "path": "clips/beir-nl-scifact",
7
+ "revision": "856d8dfc294b138856bbf3042450e3782321e44e",
8
+ },
9
+ reference="https://huggingface.co/datasets/clips/beir-nl-scifact",
10
+ type="Retrieval",
11
+ category="t2t",
12
+ modalities=["text"],
13
+ eval_splits=["test"],
14
+ eval_langs=["nld-Latn"],
15
+ main_score="ndcg_at_10",
16
+ date=("2020-05-01", "2020-05-01"), # best guess: based on submission date
17
+ domains=["Academic", "Medical", "Written"],
18
+ task_subtypes=[],
19
+ license="cc-by-4.0",
20
+ annotations_creators="derived",
21
+ dialect=[],
22
+ sample_creation="machine-translated and verified", # manually checked a small subset
23
+ bibtex_citation=r"""
28
24
  @misc{banar2024beirnlzeroshotinformationretrieval,
29
25
  archiveprefix = {arXiv},
30
26
  author = {Nikolay Banar and Ehsan Lotfi and Walter Daelemans},
@@ -35,5 +31,27 @@ class SciFactNL(AbsTaskRetrieval):
35
31
  year = {2024},
36
32
  }
37
33
  """,
34
+ )
35
+
36
+
37
+ class SciFactNL(AbsTaskRetrieval):
38
+ metadata = TaskMetadata(
39
+ name="SciFact-NL",
40
+ description="SciFactNL verifies scientific claims in Dutch using evidence from the research literature "
41
+ "containing scientific paper abstracts.",
38
42
  adapted_from=["SciFact"],
43
+ **_sci_fact_nl_metadata,
44
+ )
45
+
46
+
47
+ class SciFactNLv2(AbsTaskRetrieval):
48
+ metadata = TaskMetadata(
49
+ name="SciFact-NL.v2",
50
+ description="SciFactNL verifies scientific claims in Dutch using evidence from the research literature "
51
+ "containing scientific paper abstracts. This version adds a Dutch prompt to the dataset.",
52
+ adapted_from=["SciFact-NL"],
53
+ prompt={
54
+ "query": "Given a scientific claim, retrieve documents that support or refute the claim"
55
+ },
56
+ **_sci_fact_nl_metadata,
39
57
  )
@@ -1,33 +1,26 @@
1
1
  from mteb.abstasks.retrieval import AbsTaskRetrieval
2
2
  from mteb.abstasks.task_metadata import TaskMetadata
3
3
 
4
-
5
- class SCIDOCSNL(AbsTaskRetrieval):
6
- metadata = TaskMetadata(
7
- name="SCIDOCS-NL",
8
- dataset={
9
- "path": "clips/beir-nl-scidocs",
10
- "revision": "4e018aa220029f9d1bd5a31de3650e322e32ea38",
11
- },
12
- description=(
13
- "SciDocs, a new evaluation benchmark consisting of seven document-level tasks ranging from citation"
14
- + " prediction, to document classification and recommendation. SciDocs-NL is a Dutch translation."
15
- ),
16
- reference="https://huggingface.co/datasets/clips/beir-nl-scidocs",
17
- type="Retrieval",
18
- category="t2t",
19
- modalities=["text"],
20
- eval_splits=["test"],
21
- eval_langs=["nld-Latn"],
22
- main_score="ndcg_at_10",
23
- date=("2020-05-01", "2020-05-01"), # best guess: based on submission date
24
- domains=["Academic", "Written", "Non-fiction"],
25
- task_subtypes=[],
26
- license="cc-by-sa-4.0",
27
- annotations_creators="derived",
28
- dialect=[],
29
- sample_creation="machine-translated and verified", # manually checked a small subset
30
- bibtex_citation=r"""
4
+ _scidocsnl_metadata = dict(
5
+ dataset={
6
+ "path": "clips/beir-nl-scidocs",
7
+ "revision": "4e018aa220029f9d1bd5a31de3650e322e32ea38",
8
+ },
9
+ reference="https://huggingface.co/datasets/clips/beir-nl-scidocs",
10
+ type="Retrieval",
11
+ category="t2t",
12
+ modalities=["text"],
13
+ eval_splits=["test"],
14
+ eval_langs=["nld-Latn"],
15
+ main_score="ndcg_at_10",
16
+ date=("2020-05-01", "2020-05-01"), # best guess: based on submission date
17
+ domains=["Academic", "Written", "Non-fiction"],
18
+ task_subtypes=[],
19
+ license="cc-by-sa-4.0",
20
+ annotations_creators="derived",
21
+ dialect=[],
22
+ sample_creation="machine-translated and verified", # manually checked a small subset
23
+ bibtex_citation=r"""
31
24
  @misc{banar2024beirnlzeroshotinformationretrieval,
32
25
  archiveprefix = {arXiv},
33
26
  author = {Nikolay Banar and Ehsan Lotfi and Walter Daelemans},
@@ -38,5 +31,29 @@ class SCIDOCSNL(AbsTaskRetrieval):
38
31
  year = {2024},
39
32
  }
40
33
  """,
34
+ )
35
+
36
+
37
+ class SCIDOCSNL(AbsTaskRetrieval):
38
+ metadata = TaskMetadata(
39
+ name="SCIDOCS-NL",
40
+ description="SciDocs, a new evaluation benchmark consisting of seven document-level tasks ranging from "
41
+ "citation prediction, to document classification and recommendation. SciDocs-NL is a Dutch "
42
+ "translation.",
41
43
  adapted_from=["SCIDOCS"],
44
+ **_scidocsnl_metadata,
45
+ )
46
+
47
+
48
+ class SCIDOCSNLv2(AbsTaskRetrieval):
49
+ metadata = TaskMetadata(
50
+ name="SCIDOCS-NL.v2",
51
+ description="SciDocs, a new evaluation benchmark consisting of seven document-level tasks ranging from "
52
+ "citation prediction, to document classification and recommendation. SciDocs-NL is a Dutch "
53
+ "translation. This version adds a Dutch prompt to the dataset.",
54
+ adapted_from=["SCIDOCS-NL"],
55
+ **_scidocsnl_metadata,
56
+ prompt={
57
+ "query": "Gegeven de titel van een wetenschappelijk artikel, haal de abstracts op van artikelen die door het gegeven artikel worden geciteerd"
58
+ },
42
59
  )
@@ -38,4 +38,7 @@ class VABBRetrieval(AbsTaskRetrieval):
38
38
  year = {2024},
39
39
  }
40
40
  """,
41
+ prompt={
42
+ "query": "Gegeven een titel, haal de wetenschappelijke abstract op die het beste bij de titel past"
43
+ },
41
44
  )
@@ -35,6 +35,7 @@ class SICKNLSTS(AbsTaskSTS):
35
35
  year = {2021},
36
36
  }
37
37
  """,
38
+ prompt={"query": "Haal semantisch vergelijkbare tekst op"},
38
39
  )
39
40
 
40
41
  min_score = 0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mteb
3
- Version: 2.1.6
3
+ Version: 2.1.7
4
4
  Summary: Massive Text Embedding Benchmark
5
5
  Author-email: MTEB Contributors <niklas@huggingface.co>, Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Nouamane Tazi <nouamane@huggingface.co>, Nils Reimers <info@nils-reimers.de>
6
6
  Maintainer-email: Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Roman Solomatin <risolomatin@gmail.com>, Isaac Chung <chungisaac1217@gmail.com>
@@ -974,6 +974,7 @@ mteb/descriptive_stats/Retrieval/AppsRetrieval.json,sha256=aDBlI0xzeOIFUn6a7ylFI
974
974
  mteb/descriptive_stats/Retrieval/ArguAna-Fa.json,sha256=0Nug1fXP5u4mue6aLgHHilOoUbmI9s4491NTTbkqLsw,993
975
975
  mteb/descriptive_stats/Retrieval/ArguAna-Fa.v2.json,sha256=NNCJfL5SpqUn5JHdWr4QiIKAnm6qEIdh9kDEIHp-37U,993
976
976
  mteb/descriptive_stats/Retrieval/ArguAna-NL.json,sha256=MJfSGq8QAqbL_mjQXqtYGoxV2u0O_sIvc0ywHdlhO1c,994
977
+ mteb/descriptive_stats/Retrieval/ArguAna-NL.v2.json,sha256=MJfSGq8QAqbL_mjQXqtYGoxV2u0O_sIvc0ywHdlhO1c,994
977
978
  mteb/descriptive_stats/Retrieval/ArguAna-PL.json,sha256=WtaqLTUsMBoobvdwyqmTEMB2FFc9ULgrdLX7wufn7DU,994
978
979
  mteb/descriptive_stats/Retrieval/ArguAna-VN.json,sha256=BAAAPBe_vwIKGFbmy_pVaKSiaAtsHP_UdeVB8lhzxMU,994
979
980
  mteb/descriptive_stats/Retrieval/ArguAna.json,sha256=pX57RjspcrVBDsf4iHDI5zJcQpoqfPQutSGtwkLmPW0,995
@@ -1206,6 +1207,7 @@ mteb/descriptive_stats/Retrieval/MrTidyRetrieval.json,sha256=X2Jp5nByCv_UNmeeHOp
1206
1207
  mteb/descriptive_stats/Retrieval/MultiLongDocRetrieval.json,sha256=MGBeQevI03sxAK3qldfye3yRxQ7O5URV90NFyCNUr_k,33239
1207
1208
  mteb/descriptive_stats/Retrieval/NFCorpus-Fa.json,sha256=rqHqn43MBaA4cni5FbCD5CK_TmQ5z3SiXDmfGx7GIUg,1002
1208
1209
  mteb/descriptive_stats/Retrieval/NFCorpus-NL.json,sha256=x5eYhzWMKToA0ayxP3h10FyegCrbirrCywRQCzMI9SM,1003
1210
+ mteb/descriptive_stats/Retrieval/NFCorpus-NL.v2.json,sha256=x5eYhzWMKToA0ayxP3h10FyegCrbirrCywRQCzMI9SM,1003
1209
1211
  mteb/descriptive_stats/Retrieval/NFCorpus-PL.json,sha256=gY7QM-fjnAnaMdfdRtHaMLzugzUDKMGM-xMomtuq2Qg,1005
1210
1212
  mteb/descriptive_stats/Retrieval/NFCorpus-VN.json,sha256=S-oi9KTAmpfqWbN7aAfh6_E2QP34l9FT8c6x-UOKi4I,1002
1211
1213
  mteb/descriptive_stats/Retrieval/NFCorpus.json,sha256=bRUA4qS0L3UekskKHRRVHsV28EPMQJ3i5CuCY6FA0J8,1004
@@ -1278,6 +1280,7 @@ mteb/descriptive_stats/Retrieval/RuSciBenchCociteRetrieval.json,sha256=_Op8mcXjv
1278
1280
  mteb/descriptive_stats/Retrieval/SCIDOCS-Fa.json,sha256=CAXjSpSoy3V3TQM2PKtZtumi3fCUizc8Dlb89VEa620,986
1279
1281
  mteb/descriptive_stats/Retrieval/SCIDOCS-Fa.v2.json,sha256=G7sucOO-wWsSV0oP_jhf3AKrv5MItE9P7Xauq0M4-_I,987
1280
1282
  mteb/descriptive_stats/Retrieval/SCIDOCS-NL.json,sha256=qYFAgsJ7FT-qDLUStkHTmAu46PxesfIVEGU-DxEpf3c,987
1283
+ mteb/descriptive_stats/Retrieval/SCIDOCS-NL.v2.json,sha256=qYFAgsJ7FT-qDLUStkHTmAu46PxesfIVEGU-DxEpf3c,987
1281
1284
  mteb/descriptive_stats/Retrieval/SCIDOCS-PL.json,sha256=-N7x2MkyRMCbN2fa3D0cnNbhZftGQt1BdiZezYo7fm0,988
1282
1285
  mteb/descriptive_stats/Retrieval/SCIDOCS-VN.json,sha256=mzaXUR3KxBZBZoJ-hImEz9x6XEX-cu_WyKlb0ccx2mQ,988
1283
1286
  mteb/descriptive_stats/Retrieval/SCIDOCS.json,sha256=Xa2HtyeuWVV9xJOT1YwhrM32RubiMql1lkz-d1moKSc,988
@@ -1288,6 +1291,7 @@ mteb/descriptive_stats/Retrieval/SadeemQuestionRetrieval.json,sha256=xvMLI0XiyWb
1288
1291
  mteb/descriptive_stats/Retrieval/SciFact-Fa.json,sha256=BZHK8KUj5ShBzfomQxUbH8GBVpWYpepwdNaOZ1DHUE0,988
1289
1292
  mteb/descriptive_stats/Retrieval/SciFact-Fa.v2.json,sha256=1U8zTDU24TBaZ2cVX6puKT_oQx_vdLBeVz6MRswOrJE,976
1290
1293
  mteb/descriptive_stats/Retrieval/SciFact-NL.json,sha256=ncSxx1APRy-i0Sh_x7uEmJAPCSm9cRgLBbyUd02t0QI,989
1294
+ mteb/descriptive_stats/Retrieval/SciFact-NL.v2.json,sha256=ncSxx1APRy-i0Sh_x7uEmJAPCSm9cRgLBbyUd02t0QI,989
1291
1295
  mteb/descriptive_stats/Retrieval/SciFact-PL.json,sha256=UwPs1wwErZcNU6y5sNbKpuOOTnvXSGhgQY8IUKvjsas,977
1292
1296
  mteb/descriptive_stats/Retrieval/SciFact-VN.json,sha256=iUldliZZHxc3sEbMVqmjWk9Z36-WhMJSvIk9Ep57DH0,1002
1293
1297
  mteb/descriptive_stats/Retrieval/SciFact.json,sha256=1Gh0ph7_m5oc5hjlKw7KvslEd4qgJL8NgxtEkERTNXw,987
@@ -1769,14 +1773,14 @@ mteb/tasks/classification/mya/myanmar_news.py,sha256=fRDQBaSnNSLGIDU7HBz8xlHkIcc
1769
1773
  mteb/tasks/classification/nep/__init__.py,sha256=Sj3bPg2KXOui4nwgVhSdz6JUpjadQJ1ZJjxxpMyMLfQ,176
1770
1774
  mteb/tasks/classification/nep/nepali_news_classification.py,sha256=FL6IJTixczeO4xTCswSNdW2OLvqY3d7syEBo8BugN9s,3661
1771
1775
  mteb/tasks/classification/nld/__init__.py,sha256=a_YlxI4tL8joDa1RF-ctbA4kw_8kHhM0sWBNtX1FTcQ,1037
1772
- mteb/tasks/classification/nld/dutch_book_review_sentiment_classification.py,sha256=5JoP1APtPmAeyV_uLsmQzVw6rKTolz5WX-HJ1ugoZCc,3389
1773
- mteb/tasks/classification/nld/dutch_cola_classification.py,sha256=tV6wdrNToJMI4QL-Ujh79tDiazNVnpzcSvzA1eQWGXY,1471
1774
- mteb/tasks/classification/nld/dutch_government_bias_classification.py,sha256=Rqrv1HsrvKwI7bZctwPkKF5IUT9uLaoiiijwcGHqzPc,1458
1775
- mteb/tasks/classification/nld/dutch_news_articles_classification.py,sha256=VwAE5zj5Z6yO31k66YqoJOUEWrNzu9-nPdufGwDZpJg,1220
1776
- mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py,sha256=o9rpMS-ZovTNniGL1QhiZKpZS_zzKSrP--WY8TXIlg0,1447
1777
- mteb/tasks/classification/nld/iconclass_classification.py,sha256=t74aHdFGTbEa2tgeOc-6fpGNeC-Sd1BryzKVecK0kas,1523
1778
- mteb/tasks/classification/nld/open_tender_classification.py,sha256=PknkVwUlGWYe8dhzNe54noZhZWXGpUyj7zPii5lOWN0,1391
1779
- mteb/tasks/classification/nld/vaccin_chat_nl_classification.py,sha256=wRruABPcWdm_JzX_LkKCTpyOjK35tbY7KUB3UL-RTSc,1800
1776
+ mteb/tasks/classification/nld/dutch_book_review_sentiment_classification.py,sha256=eFCFFSUfRwXvXVaf6C-ntPs_DiXa-6P4uuoB_3ZEb00,3566
1777
+ mteb/tasks/classification/nld/dutch_cola_classification.py,sha256=_mRNluXpo91ChJXIOGwRNsimQqotmhnk0dwW4DZjEX0,1601
1778
+ mteb/tasks/classification/nld/dutch_government_bias_classification.py,sha256=pDU6zS7u69Pz1AiEAT0na-3haUAJtvMqywFgDoqKImU,1594
1779
+ mteb/tasks/classification/nld/dutch_news_articles_classification.py,sha256=OU0C_k06iIP3EZ_8PMEeRk29q_Fp0G64j1jRwHNGFXs,1343
1780
+ mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py,sha256=_uSrF4qTwiiPq0x3eJ3puaF0RbzqVwPwGAtKLywBB38,1569
1781
+ mteb/tasks/classification/nld/iconclass_classification.py,sha256=YlrA74ePG5ijAAj-wwIU-Gom5j5cyWDSCPYbVYLspd8,1655
1782
+ mteb/tasks/classification/nld/open_tender_classification.py,sha256=ObtvxZjMhzJkaSOLniyWFtHgVdIOFBq7C84_VTRLT50,1525
1783
+ mteb/tasks/classification/nld/vaccin_chat_nl_classification.py,sha256=QjYdEFbDMGCM5PhKR1ydpErTj35ttnOjg0Q7sg3vlo0,1921
1780
1784
  mteb/tasks/classification/nob/__init__.py,sha256=axgfxU9oueXuBJgbnyb0B5coAqxiCv543SsMEYcQm7o,366
1781
1785
  mteb/tasks/classification/nob/no_rec_classification.py,sha256=xXetGpPz_juq31cSsJnO6RZ5oyCfi080c_jjmPQRxFM,4186
1782
1786
  mteb/tasks/classification/nob/norwegian_parliament_classification.py,sha256=ztJSIufWm3WCwg3Is00NN4tIOuRVBFkiTFYM7bsxlb8,4007
@@ -1913,13 +1917,13 @@ mteb/tasks/clustering/multilingual/mlsum_clustering_s2s.py,sha256=4UfRF82DLNLLvD
1913
1917
  mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py,sha256=a8uu1Eiq4_M-U28eBC511Y3UNCHZVK64oZtVmAuk7Wo,8574
1914
1918
  mteb/tasks/clustering/multilingual/wiki_clustering_p2p.py,sha256=XgMqPZ74tWjxU169qN8ri_aZVQ_kaR6xFVNJYyWBt4E,4384
1915
1919
  mteb/tasks/clustering/nld/__init__.py,sha256=_KwG_aTmQpSAek4clgtkV7MQ5dpUOmbgIQWZoux5T-w,682
1916
- mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py,sha256=UOJ7eLoYAoGgW6iAhmJfFzxZTAXO8f8oZhKHuVJzwBw,1442
1917
- mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py,sha256=undeyAivREAZQOmG3beegrQZVmPAII5rDfxIk8ylhvQ,1443
1918
- mteb/tasks/clustering/nld/iconclass_clustering_s2s.py,sha256=yvzLRLXYRQ2pz-w2zWiuNJwPJT7NwN-17BgDnHmUui8,1739
1919
- mteb/tasks/clustering/nld/open_tender_clustering_p2p.py,sha256=0MQ3PcrbuKh-1JTFPoh-KZcV96kDBLz6-ujjsrYFZ3w,1910
1920
- mteb/tasks/clustering/nld/open_tender_clustering_s2s.py,sha256=5zp1upeLZGKy3sCStEVjdhISGaEcBFO_a0_Ut45VXbQ,1591
1921
- mteb/tasks/clustering/nld/vabb_clustering_p2p.py,sha256=ivHWLWW9ZP-EKoJcb63iB-J5w-GnaNn0cf8t1fMDwRk,2041
1922
- mteb/tasks/clustering/nld/vabb_clustering_s2s.py,sha256=qbOdO6U_hf19i5sGO_X336ZJ6wnCGt3I5KnndsgtVNQ,2056
1920
+ mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py,sha256=xFiTQPZosUR8iYwHG3T1-SGwtMReMg5ofQvkrvIHjiE,1580
1921
+ mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py,sha256=07i07z9-Iu6d_PGtS4uFBNGte0PXqWmif-x_B5PxndE,1568
1922
+ mteb/tasks/clustering/nld/iconclass_clustering_s2s.py,sha256=k1S7Cx3n9kYg2jmQGydaYFC6zbb-D--S6hbxYQ6bIVg,1865
1923
+ mteb/tasks/clustering/nld/open_tender_clustering_p2p.py,sha256=Yek6Y9ITCJ95Yhg48ryItfxx0e_Um2s8OtBvv2zZ0YU,2052
1924
+ mteb/tasks/clustering/nld/open_tender_clustering_s2s.py,sha256=9DlFx_VQHFrRR5Gi6zDg0OYnmxeXYSPMlFKpFhjFvco,1715
1925
+ mteb/tasks/clustering/nld/vabb_clustering_p2p.py,sha256=TDGBUwN6AAQ8U0S0Whp5OiU0_XAsNsonrCeqxNnYFCg,2191
1926
+ mteb/tasks/clustering/nld/vabb_clustering_s2s.py,sha256=2UXW5pk95meD5FNHuMY4S1mhlaPl0NfrKDAV9N4_uCY,2193
1923
1927
  mteb/tasks/clustering/nob/__init__.py,sha256=de-t3amIyZAo0iPjy33xVMFKCQlcDNmS4M8dgfWsr1U,503
1924
1928
  mteb/tasks/clustering/nob/snl_clustering.py,sha256=n4-cHyAgJUEckqvEO1fk-o4rdXeIALNe2-enpvW4M9o,3580
1925
1929
  mteb/tasks/clustering/nob/snl_hierarchical_clustering.py,sha256=OfdGmodK0eHTQMLo22SMI4FxM47wUCJmYYL8OOPqrCg,3328
@@ -1988,8 +1992,8 @@ mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py,sha256=s
1988
1992
  mteb/tasks/multilabel_classification/multilingual/__init__.py,sha256=gYOJftJq07gP6SH5QMn-SZQ83MQINfWlz1fvFRDdqmI,139
1989
1993
  mteb/tasks/multilabel_classification/multilingual/multi_eurlex_multilabel_classification.py,sha256=vfnIfW_oOy8dnrXd2zKBNmhTfg8ydVtYIJqZxRMsqr0,2405
1990
1994
  mteb/tasks/multilabel_classification/nld/__init__.py,sha256=84hVtfQAZNYt1lGzf0Oc_Hrx6vBKHjN2L_h2ilcIKRk,297
1991
- mteb/tasks/multilabel_classification/nld/covid_disinformation_nl_multi_label_classification.py,sha256=UJ5tEKTU-WjcwZnToejxqw1bcaIfEIKhZY5ZyFnO4FU,2927
1992
- mteb/tasks/multilabel_classification/nld/vabb_multi_label_classification.py,sha256=KPhsIv8ffktfyuZxjwi4_ymnjsz7I-ZE2P5eC_Lul1w,1802
1995
+ mteb/tasks/multilabel_classification/nld/covid_disinformation_nl_multi_label_classification.py,sha256=LgRBPVW6gMHtlpfYDs4TTl7E1QJYNOKeuH1kszFjNrM,3085
1996
+ mteb/tasks/multilabel_classification/nld/vabb_multi_label_classification.py,sha256=_eapD0SUYG8ZUhnc3-NmN86Qer8qkTGRnrSn_T-Ch5Y,1939
1993
1997
  mteb/tasks/multilabel_classification/por/__init__.py,sha256=mJW70APO6ofl6XiPEbsRgL1qVi_RMsWpJwHrdx5_wmw,136
1994
1998
  mteb/tasks/multilabel_classification/por/brazilian_toxic_tweets_classification.py,sha256=m1jA33p_MVNoYRptNuVfd-YEPTc1G3EcCFB836YFlMk,1975
1995
1999
  mteb/tasks/multilabel_classification/rus/__init__.py,sha256=skzrIGJ4giqi0tIiwCZux3JAYDlmu_vYZWrEBlusC6Q,355
@@ -2037,8 +2041,8 @@ mteb/tasks/pair_classification/multilingual/rte3.py,sha256=ZR3kpz8Y2AxyLe0uIYORo
2037
2041
  mteb/tasks/pair_classification/multilingual/x_stance.py,sha256=NPLzF8j_d_Y8kLocfIr8qaeJzJBwWE-1uveXxmGYI8M,1536
2038
2042
  mteb/tasks/pair_classification/multilingual/xnli.py,sha256=q7erc3tCbamua46CuhJc36jPKOoIs4KCzD91zBPG-Y4,4677
2039
2043
  mteb/tasks/pair_classification/nld/__init__.py,sha256=H65VNZnmBDsyupF4PtYVz-PwYD9b0xXb-9e6ODK2-DM,214
2040
- mteb/tasks/pair_classification/nld/sick_nl_pair_classification.py,sha256=mk4P6iZU6x9PenULz1b60AVV6VocXT9JLpnq8ZT-fy8,1343
2041
- mteb/tasks/pair_classification/nld/xlwic_nl_pair_classification.py,sha256=Q-ObZiYkPyhqK3cygWYUjCpHznIVcKcR5VZ-wF7onfM,1890
2044
+ mteb/tasks/pair_classification/nld/sick_nl_pair_classification.py,sha256=l420srn-nm1wbo361ogCU-Fq2B0Gv97FiN9UXSUrElY,1459
2045
+ mteb/tasks/pair_classification/nld/xlwic_nl_pair_classification.py,sha256=YZ9yG4Lyh9jd7DgbBzo0sd7QNmjZZ64eR7rMiA0STxE,2006
2042
2046
  mteb/tasks/pair_classification/pol/__init__.py,sha256=3bEzEe4yom2SL3XaqNwSs_0eCv6LKFEAoOOoD9FK-gk,110
2043
2047
  mteb/tasks/pair_classification/pol/polish_pc.py,sha256=vUkkYqewzNcRQkZ_wLM63xH21PKtBGhiVGLPas5XN6o,6782
2044
2048
  mteb/tasks/pair_classification/por/__init__.py,sha256=E_dD1BZfjS0gBjzmkhUnl8SrqiGegLRfPhcItUAn6b8,104
@@ -2344,9 +2348,9 @@ mteb/tasks/retrieval/multilingual/x_market_retrieval.py,sha256=vp1Q5al9swuoChpbm
2344
2348
  mteb/tasks/retrieval/multilingual/x_qu_ad_retrieval.py,sha256=o5rhMeRf9HCZQxDKU9SIibqQSR5pw4bmYV8I5nALmQ4,3971
2345
2349
  mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py,sha256=IUMlMB1G1MbYOg9q81FPTk1pKnnLGS-WLgypehOOXqQ,5123
2346
2350
  mteb/tasks/retrieval/multilingual/xpqa_retrieval.py,sha256=7ZL1cDK8OLvRwqC5r1dJKiQUkJP27RvuA_XX_Oaa--E,3029
2347
- mteb/tasks/retrieval/nld/__init__.py,sha256=AmiOwoaf7N8uElM8w3qRBttTVDmHE99SaTNz8kxJ7Pw,2668
2348
- mteb/tasks/retrieval/nld/argu_ana_nl_retrieval.py,sha256=R65AvhmxvXmtLiguNBTKNLMh4Q-s7o9UWoTR-3x2_GU,1534
2349
- mteb/tasks/retrieval/nld/bbsard_nl_retrieval.py,sha256=0SAHmPvNwR7AzeLiZDduN7GMguS6HZrhVG-l-VOz79k,1517
2351
+ mteb/tasks/retrieval/nld/__init__.py,sha256=kQ7eR-n63XTBnzK1lHexfsCPjJWeg2OpLi5i6M3HSDc,2798
2352
+ mteb/tasks/retrieval/nld/argu_ana_nl_retrieval.py,sha256=c3O3PWvv-XpHE3YNv0EGzPbR71VnSq9RhOsT57z5iRY,2048
2353
+ mteb/tasks/retrieval/nld/bbsard_nl_retrieval.py,sha256=4QUFjAer2WapzLZUNOUTnjZwgsKoCFi7rqX1UJnneOU,1669
2350
2354
  mteb/tasks/retrieval/nld/climate_fevernl_retrieval.py,sha256=VQ7Z3GpmIdhM47e5Vz3p7bSzJaYBKvVf8SX9VvvLlEw,1577
2351
2355
  mteb/tasks/retrieval/nld/cqa_dupstack_android_nl_retrieval.py,sha256=A6bTYkteMZWUSf_ocJ8bL_4PyCMeq34vavvNGuIWt-Q,2618
2352
2356
  mteb/tasks/retrieval/nld/cqa_dupstack_english_nl_retrieval.py,sha256=6q7yzUe4Tw-BGi_lQPXMBgMMO5tqZ-y7n0NdzQ0Pm-c,2618
@@ -2361,21 +2365,21 @@ mteb/tasks/retrieval/nld/cqa_dupstack_unix_nl_retrieval.py,sha256=JgQsDdqTth9EFR
2361
2365
  mteb/tasks/retrieval/nld/cqa_dupstack_webmasters_nl_retrieval.py,sha256=A_eHnC2bgmNOM_QfJJYh_salrHENYlwjmZ4A1hFmzJ4,2600
2362
2366
  mteb/tasks/retrieval/nld/cqa_dupstack_wordpress_nl_retrieval.py,sha256=Yo0NBvNn8BJWuoNWAGvAI-2khVSreqa4XXX0r5LZ7wo,2596
2363
2367
  mteb/tasks/retrieval/nld/db_pedia_nl_retrieval.py,sha256=bhd2ESL-roRgvDDB4cAKDmusYmOEJaJGfMfuMzFLVcw,1633
2364
- mteb/tasks/retrieval/nld/dutch_news_articles_retrieval.py,sha256=ds0_AJGTVBKr7KYV3k2H9UfF0WcmM9gDuYg8_sbvZVY,1195
2368
+ mteb/tasks/retrieval/nld/dutch_news_articles_retrieval.py,sha256=RfQXg-GNSeL6ZklO3YKJ2dKG8BOCht-3mfCg94Q2iU0,1323
2365
2369
  mteb/tasks/retrieval/nld/fevernl_retrieval.py,sha256=LZ8xofhy8xINPWdV8QYvmY1sQPtb8taHLIQ8iKJ34Iw,1694
2366
2370
  mteb/tasks/retrieval/nld/fi_qa2018_nl_retrieval.py,sha256=Qfd2Wdnmq99Onb4iCShAc0jPQ_bTreSkRShhQWqfkJA,1503
2367
2371
  mteb/tasks/retrieval/nld/hotpot_qanl_retrieval.py,sha256=xNPApN1CZzkhpvU4ZRf-rSSmljpSnFUQDVoqraW93X0,1635
2368
- mteb/tasks/retrieval/nld/legal_qa_nl_retrieval.py,sha256=vIwg1scjdbuISjZ5xNnch1mWZn4314xfHVaNbCnlUMs,1420
2372
+ mteb/tasks/retrieval/nld/legal_qa_nl_retrieval.py,sha256=w5FL9aZB3HzoZcnNwR4GdYrUoJ2nCH9UrzBCjig4RLQ,1572
2369
2373
  mteb/tasks/retrieval/nld/mmarconl_retrieval.py,sha256=iColk0qI_Ga3KHL_4aIGKK2X0S-WvqFXqDJqujzJaIA,1683
2370
- mteb/tasks/retrieval/nld/nf_corpus_nl_retrieval.py,sha256=wp2ylUbflMg8BAocMgdDxgDRn9wNxIoMf3i8vc_L75I,1511
2374
+ mteb/tasks/retrieval/nld/nf_corpus_nl_retrieval.py,sha256=6kLUAvs7w5_aximrHC93fDh_thTXppr15BnBzPH5P_E,2008
2371
2375
  mteb/tasks/retrieval/nld/nqnl_retrieval.py,sha256=M4olYlutXu2VZUA5uMF-uga7V53GivPupyTKs4jy_bw,1394
2372
- mteb/tasks/retrieval/nld/open_tender_retrieval.py,sha256=aSsge7976bwUxr2xg7q1ilamoA8WL23tpGlOTQsAsD8,1383
2376
+ mteb/tasks/retrieval/nld/open_tender_retrieval.py,sha256=lwlGFoFkyxw7EI6tvV2RqBWGU1R6Syt-0FiRg8Qdz7c,1522
2373
2377
  mteb/tasks/retrieval/nld/quora_nl_retrieval.py,sha256=u7v-huChD-HymugAzG0fawTVUntVOyiDdmmRxC0d0Pw,1600
2374
- mteb/tasks/retrieval/nld/sci_fact_nl_retrieval.py,sha256=g1e8xjqNHboZwZpHQ5l4GYCYYfSoyx7xELvDaIWPfUA,1505
2375
- mteb/tasks/retrieval/nld/scidocsnl_retrieval.py,sha256=z9W0fb8L-yUoIp2ae0tsVoCxh_512Y_nbiB1YGJEiiA,1617
2378
+ mteb/tasks/retrieval/nld/sci_fact_nl_retrieval.py,sha256=uRE-Fcfmu8BJxKgYu0p8UL58Kdue9Vv9Yove3ZeIzl4,2021
2379
+ mteb/tasks/retrieval/nld/scidocsnl_retrieval.py,sha256=JhQYZX-DthiYprxyzJNMETRGvRtAQwWnR0qfmC2FPJo,2226
2376
2380
  mteb/tasks/retrieval/nld/touche2020_nl_retrieval.py,sha256=nPLZxNvhTDWkIJU6i2EPY3mp0SbLn8rZEL-7GP-XwGY,1481
2377
2381
  mteb/tasks/retrieval/nld/treccovidnl_retrieval.py,sha256=d9rL10YNTUBVubdFxIVxqEhkf8tx9Iuxsp1BZ-Ctoyk,1671
2378
- mteb/tasks/retrieval/nld/vabb_retrieval.py,sha256=U_sFses437b7wVbAccsF-y7pcL3PZOgINEA2GWGOz2Y,1694
2382
+ mteb/tasks/retrieval/nld/vabb_retrieval.py,sha256=FoudYkcY4IY0PNHCvx87bjoUnJJolWVwNhq6xH9HE84,1834
2379
2383
  mteb/tasks/retrieval/nob/__init__.py,sha256=6PYJtnMhN5OtRwXWLAMu5V-3JnZnbHrLxMOk8Ir-b9w,126
2380
2384
  mteb/tasks/retrieval/nob/norquad.py,sha256=sqQLt3ajBrui2TnwvuPny5tU-aijiRUHA1kzXmF7cN8,3779
2381
2385
  mteb/tasks/retrieval/nob/snl_retrieval.py,sha256=VXouYjek_U8jCdpuvIzQ00YRIq0gJM_5lorgVFzlI6g,3143
@@ -2483,7 +2487,7 @@ mteb/tasks/sts/multilingual/sts22_crosslingual_sts.py,sha256=zf50mdAKKZZN_sU5Ga8
2483
2487
  mteb/tasks/sts/multilingual/sts_benchmark_multilingual_sts.py,sha256=QXenk9v8Mg5pNdkTmEO1X4JFp-3OCUfhHOPO3BE32OQ,1806
2484
2488
  mteb/tasks/sts/multilingual/sts_benchmark_multilingual_visual_sts.py,sha256=yH4DLkgrBk6Qg7LHLfAyyclynN_XrichBraNDbxRWl8,1855
2485
2489
  mteb/tasks/sts/nld/__init__.py,sha256=NdpfPHekoMIo9sw75Gahm_YCn8hzcVHixEStny107fk,67
2486
- mteb/tasks/sts/nld/sick_nl_sts.py,sha256=pRJUmUzHehK9sw0cT0xt6BHyX9a0fBSh9P90lfDkt5Y,1489
2490
+ mteb/tasks/sts/nld/sick_nl_sts.py,sha256=rnZ9KeM4Id_eeJJrqcYYdxoH7f6-2lr9obgiuxtHnGY,1557
2487
2491
  mteb/tasks/sts/pol/__init__.py,sha256=Ob56PuMH_-J3gH9Pygg0fdEisWYKRT_fy_D0iE2zIpo,83
2488
2492
  mteb/tasks/sts/pol/polish_sts.py,sha256=qJCw-28dZ-o3epx44Zz-u6ZLE8HIu4ocJaJRyD0JQkc,3418
2489
2493
  mteb/tasks/sts/por/__init__.py,sha256=mUc8zOCeFl456-AEqKqgLQ4_sLnt_eUqD-cbcowRu0U,107
@@ -2538,9 +2542,9 @@ mteb/types/_metadata.py,sha256=NN-W0S6a5TDV7UkpRx1pyWtGF4TyyCyoPUfHOwdeci8,2290
2538
2542
  mteb/types/_result.py,sha256=CRAUc5IvqI3_9SyXDwv-PWLCXwXdZem9RePeYESRtuw,996
2539
2543
  mteb/types/_string_validators.py,sha256=PY-dYq4E8O50VS3bLYdldPWp400fl_WzUjfVSkNWe8U,523
2540
2544
  mteb/types/statistics.py,sha256=YwJsxTf1eaCI_RE-J37a-gK5wDeGAsmkeZKoZCFihSo,3755
2541
- mteb-2.1.6.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
2542
- mteb-2.1.6.dist-info/METADATA,sha256=hB6t2OTnS5La-oqlUBIi67CI21acqqhYkdVSnXLriGU,13573
2543
- mteb-2.1.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
2544
- mteb-2.1.6.dist-info/entry_points.txt,sha256=8IJoEJFKoDHmVnNev-qJ9pp4Ln7_1-ma9QsXnzVCzGU,39
2545
- mteb-2.1.6.dist-info/top_level.txt,sha256=OLVIjcQAlWBz0bdmutKlWHLF42FF0hp4uVAg3ZyiG4U,5
2546
- mteb-2.1.6.dist-info/RECORD,,
2545
+ mteb-2.1.7.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
2546
+ mteb-2.1.7.dist-info/METADATA,sha256=1h_yON1NtxXblqwMqsRrXSNF-ySHQ714gfDzZfNiuLE,13573
2547
+ mteb-2.1.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
2548
+ mteb-2.1.7.dist-info/entry_points.txt,sha256=8IJoEJFKoDHmVnNev-qJ9pp4Ln7_1-ma9QsXnzVCzGU,39
2549
+ mteb-2.1.7.dist-info/top_level.txt,sha256=OLVIjcQAlWBz0bdmutKlWHLF42FF0hp4uVAg3ZyiG4U,5
2550
+ mteb-2.1.7.dist-info/RECORD,,
File without changes