mteb 2.0.5__py3-none-any.whl → 2.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. mteb/__init__.py +10 -1
  2. mteb/benchmarks/benchmarks/__init__.py +2 -0
  3. mteb/benchmarks/benchmarks/benchmarks.py +75 -0
  4. mteb/descriptive_stats/Classification/DutchColaClassification.json +54 -0
  5. mteb/descriptive_stats/Classification/DutchGovernmentBiasClassification.json +54 -0
  6. mteb/descriptive_stats/Classification/DutchNewsArticlesClassification.json +90 -0
  7. mteb/descriptive_stats/Classification/DutchSarcasticHeadlinesClassification.json +54 -0
  8. mteb/descriptive_stats/Classification/IconclassClassification.json +96 -0
  9. mteb/descriptive_stats/Classification/OpenTenderClassification.json +222 -0
  10. mteb/descriptive_stats/Classification/VaccinChatNLClassification.json +1068 -0
  11. mteb/descriptive_stats/Clustering/DutchNewsArticlesClusteringP2P.json +45 -0
  12. mteb/descriptive_stats/Clustering/DutchNewsArticlesClusteringS2S.json +45 -0
  13. mteb/descriptive_stats/Clustering/IconclassClusteringS2S.json +48 -0
  14. mteb/descriptive_stats/Clustering/OpenTenderClusteringP2P.json +111 -0
  15. mteb/descriptive_stats/Clustering/OpenTenderClusteringS2S.json +111 -0
  16. mteb/descriptive_stats/Clustering/VABBClusteringP2P.json +60 -0
  17. mteb/descriptive_stats/Clustering/VABBClusteringS2S.json +60 -0
  18. mteb/descriptive_stats/MultilabelClassification/CovidDisinformationNLMultiLabelClassification.json +84 -0
  19. mteb/descriptive_stats/MultilabelClassification/VABBMultiLabelClassification.json +156 -0
  20. mteb/descriptive_stats/PairClassification/SICKNLPairClassification.json +35 -0
  21. mteb/descriptive_stats/PairClassification/XLWICNLPairClassification.json +35 -0
  22. mteb/descriptive_stats/Retrieval/DutchNewsArticlesRetrieval.json +30 -0
  23. mteb/descriptive_stats/Retrieval/LegalQANLRetrieval.json +30 -0
  24. mteb/descriptive_stats/Retrieval/OpenTenderRetrieval.json +30 -0
  25. mteb/descriptive_stats/Retrieval/VABBRetrieval.json +30 -0
  26. mteb/descriptive_stats/Retrieval/bBSARDNLRetrieval.json +30 -0
  27. mteb/descriptive_stats/STS/SICK-NL-STS.json +28 -0
  28. mteb/tasks/classification/nld/__init__.py +16 -0
  29. mteb/tasks/classification/nld/dutch_cola_classification.py +38 -0
  30. mteb/tasks/classification/nld/dutch_government_bias_classification.py +37 -0
  31. mteb/tasks/classification/nld/dutch_news_articles_classification.py +30 -0
  32. mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +36 -0
  33. mteb/tasks/classification/nld/iconclass_classification.py +41 -0
  34. mteb/tasks/classification/nld/open_tender_classification.py +38 -0
  35. mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +46 -0
  36. mteb/tasks/clustering/__init__.py +1 -0
  37. mteb/tasks/clustering/nld/__init__.py +17 -0
  38. mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +37 -0
  39. mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +37 -0
  40. mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +47 -0
  41. mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +51 -0
  42. mteb/tasks/clustering/nld/open_tender_clustering_s2s.py +41 -0
  43. mteb/tasks/clustering/nld/vabb_clustering_p2p.py +51 -0
  44. mteb/tasks/clustering/nld/vabb_clustering_s2s.py +51 -0
  45. mteb/tasks/multilabel_classification/__init__.py +1 -0
  46. mteb/tasks/multilabel_classification/nld/__init__.py +9 -0
  47. mteb/tasks/multilabel_classification/nld/covid_disinformation_nl_multi_label_classification.py +88 -0
  48. mteb/tasks/multilabel_classification/nld/vabb_multi_label_classification.py +44 -0
  49. mteb/tasks/pair_classification/__init__.py +1 -0
  50. mteb/tasks/pair_classification/nld/__init__.py +7 -0
  51. mteb/tasks/pair_classification/nld/sick_nl_pair_classification.py +36 -0
  52. mteb/tasks/pair_classification/nld/xlwic_nl_pair_classification.py +41 -0
  53. mteb/tasks/retrieval/nld/__init__.py +10 -0
  54. mteb/tasks/retrieval/nld/bbsard_nl_retrieval.py +41 -0
  55. mteb/tasks/retrieval/nld/dutch_news_articles_retrieval.py +30 -0
  56. mteb/tasks/retrieval/nld/legal_qa_nl_retrieval.py +39 -0
  57. mteb/tasks/retrieval/nld/open_tender_retrieval.py +38 -0
  58. mteb/tasks/retrieval/nld/vabb_retrieval.py +41 -0
  59. mteb/tasks/sts/__init__.py +1 -0
  60. mteb/tasks/sts/nld/__init__.py +5 -0
  61. mteb/tasks/sts/nld/sick_nl_sts.py +41 -0
  62. {mteb-2.0.5.dist-info → mteb-2.1.0.dist-info}/METADATA +2 -204
  63. {mteb-2.0.5.dist-info → mteb-2.1.0.dist-info}/RECORD +67 -15
  64. {mteb-2.0.5.dist-info → mteb-2.1.0.dist-info}/WHEEL +0 -0
  65. {mteb-2.0.5.dist-info → mteb-2.1.0.dist-info}/entry_points.txt +0 -0
  66. {mteb-2.0.5.dist-info → mteb-2.1.0.dist-info}/licenses/LICENSE +0 -0
  67. {mteb-2.0.5.dist-info → mteb-2.1.0.dist-info}/top_level.txt +0 -0
mteb/__init__.py CHANGED
@@ -1,5 +1,6 @@
1
1
  from importlib.metadata import version
2
2
 
3
+ from mteb import types
3
4
  from mteb.abstasks import AbsTask
4
5
  from mteb.abstasks.task_metadata import TaskMetadata
5
6
  from mteb.deprecated_evaluator import MTEB
@@ -7,7 +8,12 @@ from mteb.evaluate import evaluate
7
8
  from mteb.filter_tasks import filter_tasks
8
9
  from mteb.get_tasks import get_task, get_tasks
9
10
  from mteb.load_results import load_results
10
- from mteb.models import EncoderProtocol, SentenceTransformerEncoderWrapper
11
+ from mteb.models import (
12
+ CrossEncoderProtocol,
13
+ EncoderProtocol,
14
+ SearchProtocol,
15
+ SentenceTransformerEncoderWrapper,
16
+ )
11
17
  from mteb.models.get_model_meta import get_model, get_model_meta, get_model_metas
12
18
  from mteb.results import BenchmarkResults, TaskResult
13
19
 
@@ -21,7 +27,9 @@ __all__ = [
21
27
  "AbsTask",
22
28
  "Benchmark",
23
29
  "BenchmarkResults",
30
+ "CrossEncoderProtocol",
24
31
  "EncoderProtocol",
32
+ "SearchProtocol",
25
33
  "SentenceTransformerEncoderWrapper",
26
34
  "TaskMetadata",
27
35
  "TaskResult",
@@ -35,4 +43,5 @@ __all__ = [
35
43
  "get_task",
36
44
  "get_tasks",
37
45
  "load_results",
46
+ "types",
38
47
  ]
@@ -27,6 +27,7 @@ from mteb.benchmarks.benchmarks.benchmarks import (
27
27
  MTEB_KOR,
28
28
  MTEB_MAIN_RU,
29
29
  MTEB_MINERS_BITEXT_MINING,
30
+ MTEB_NL,
30
31
  MTEB_POL,
31
32
  MTEB_RETRIEVAL_LAW,
32
33
  MTEB_RETRIEVAL_MEDICAL,
@@ -87,6 +88,7 @@ __all__ = [
87
88
  "MTEB_KOR",
88
89
  "MTEB_MAIN_RU",
89
90
  "MTEB_MINERS_BITEXT_MINING",
91
+ "MTEB_NL",
90
92
  "MTEB_POL",
91
93
  "MTEB_RETRIEVAL_LAW",
92
94
  "MTEB_RETRIEVAL_MEDICAL",
@@ -1636,6 +1636,81 @@ BEIR_NL = Benchmark(
1636
1636
  """,
1637
1637
  )
1638
1638
 
1639
+ MTEB_NL = Benchmark(
1640
+ name="MTEB(nld, v1)",
1641
+ display_name="MTEB-NL",
1642
+ icon="https://github.com/lipis/flag-icons/raw/260c91531be024944c6514130c5defb2ebb02b7d/flags/4x3/nl.svg",
1643
+ tasks=MTEBTasks(
1644
+ get_tasks(
1645
+ languages=["nld"],
1646
+ exclusive_language_filter=True,
1647
+ tasks=[
1648
+ # Classification
1649
+ "DutchBookReviewSentimentClassification",
1650
+ "MassiveIntentClassification",
1651
+ "MassiveScenarioClassification",
1652
+ "SIB200Classification",
1653
+ "MultiHateClassification",
1654
+ "VaccinChatNLClassification",
1655
+ "DutchColaClassification",
1656
+ "DutchGovernmentBiasClassification",
1657
+ "DutchSarcasticHeadlinesClassification",
1658
+ "DutchNewsArticlesClassification",
1659
+ "OpenTenderClassification",
1660
+ "IconclassClassification",
1661
+ # # PairClassification
1662
+ "SICKNLPairClassification",
1663
+ "XLWICNLPairClassification",
1664
+ # # MultiLabelClassification
1665
+ "CovidDisinformationNLMultiLabelClassification",
1666
+ "MultiEURLEXMultilabelClassification",
1667
+ "VABBMultiLabelClassification",
1668
+ # # Clustering
1669
+ "DutchNewsArticlesClusteringS2S",
1670
+ "DutchNewsArticlesClusteringP2P",
1671
+ "SIB200ClusteringS2S",
1672
+ "VABBClusteringS2S",
1673
+ "VABBClusteringP2P",
1674
+ "OpenTenderClusteringS2S",
1675
+ "OpenTenderClusteringP2P",
1676
+ "IconclassClusteringS2S",
1677
+ # # Reranking
1678
+ "WikipediaRerankingMultilingual",
1679
+ # # Retrieval
1680
+ "ArguAna-NL",
1681
+ "SCIDOCS-NL",
1682
+ "SciFact-NL",
1683
+ "NFCorpus-NL",
1684
+ "BelebeleRetrieval",
1685
+ # "WebFAQRetrieval",
1686
+ "DutchNewsArticlesRetrieval",
1687
+ "bBSARDNLRetrieval",
1688
+ "LegalQANLRetrieval",
1689
+ "OpenTenderRetrieval",
1690
+ "VABBRetrieval",
1691
+ "WikipediaRetrievalMultilingual",
1692
+ # # STS
1693
+ "SICK-NL-STS",
1694
+ "STSBenchmarkMultilingualSTS",
1695
+ ],
1696
+ )
1697
+ ),
1698
+ description="MTEB-NL",
1699
+ reference="https://arxiv.org/abs/2509.12340",
1700
+ contacts=["nikolay-banar"],
1701
+ citation=r"""
1702
+ @misc{banar2025mtebnle5nlembeddingbenchmark,
1703
+ archiveprefix = {arXiv},
1704
+ author = {Nikolay Banar and Ehsan Lotfi and Jens Van Nooten and Cristina Arhiliuc and Marija Kliocaite and Walter Daelemans},
1705
+ eprint = {22509.12340},
1706
+ primaryclass = {cs.CL},
1707
+ title = {MTEB-NL and E5-NL: Embedding Benchmark and Models for Dutch},
1708
+ url = {https://arxiv.org/abs/2509.12340},
1709
+ year = {2025},
1710
+ }
1711
+ """,
1712
+ )
1713
+
1639
1714
  MIEB_common_tasks = [
1640
1715
  # Image Classification
1641
1716
  "Birdsnap", # fine
@@ -0,0 +1,54 @@
1
+ {
2
+ "test": {
3
+ "num_samples": 2400,
4
+ "number_texts_intersect_with_train": null,
5
+ "text_statistics": {
6
+ "total_text_length": 92146,
7
+ "min_text_length": 5,
8
+ "average_text_length": 38.39416666666666,
9
+ "max_text_length": 138,
10
+ "unique_texts": 2400
11
+ },
12
+ "image_statistics": null,
13
+ "label_statistics": {
14
+ "min_labels_per_text": 1,
15
+ "average_label_per_text": 1.0,
16
+ "max_labels_per_text": 1,
17
+ "unique_labels": 2,
18
+ "labels": {
19
+ "1": {
20
+ "count": 1200
21
+ },
22
+ "0": {
23
+ "count": 1200
24
+ }
25
+ }
26
+ }
27
+ },
28
+ "train": {
29
+ "num_samples": 19893,
30
+ "number_texts_intersect_with_train": null,
31
+ "text_statistics": {
32
+ "total_text_length": 761416,
33
+ "min_text_length": 4,
34
+ "average_text_length": 38.27557432262605,
35
+ "max_text_length": 152,
36
+ "unique_texts": 19893
37
+ },
38
+ "image_statistics": null,
39
+ "label_statistics": {
40
+ "min_labels_per_text": 1,
41
+ "average_label_per_text": 1.0,
42
+ "max_labels_per_text": 1,
43
+ "unique_labels": 2,
44
+ "labels": {
45
+ "1": {
46
+ "count": 12604
47
+ },
48
+ "0": {
49
+ "count": 7289
50
+ }
51
+ }
52
+ }
53
+ }
54
+ }
@@ -0,0 +1,54 @@
1
+ {
2
+ "test": {
3
+ "num_samples": 752,
4
+ "number_texts_intersect_with_train": 100,
5
+ "text_statistics": {
6
+ "total_text_length": 171956,
7
+ "min_text_length": 32,
8
+ "average_text_length": 228.66489361702128,
9
+ "max_text_length": 2746,
10
+ "unique_texts": 752
11
+ },
12
+ "image_statistics": null,
13
+ "label_statistics": {
14
+ "min_labels_per_text": 1,
15
+ "average_label_per_text": 1.0,
16
+ "max_labels_per_text": 1,
17
+ "unique_labels": 2,
18
+ "labels": {
19
+ "0.0": {
20
+ "count": 555
21
+ },
22
+ "1.0": {
23
+ "count": 197
24
+ }
25
+ }
26
+ }
27
+ },
28
+ "train": {
29
+ "num_samples": 1718,
30
+ "number_texts_intersect_with_train": null,
31
+ "text_statistics": {
32
+ "total_text_length": 390362,
33
+ "min_text_length": 18,
34
+ "average_text_length": 227.2188591385332,
35
+ "max_text_length": 2662,
36
+ "unique_texts": 1718
37
+ },
38
+ "image_statistics": null,
39
+ "label_statistics": {
40
+ "min_labels_per_text": 1,
41
+ "average_label_per_text": 1.0,
42
+ "max_labels_per_text": 1,
43
+ "unique_labels": 2,
44
+ "labels": {
45
+ "1.0": {
46
+ "count": 470
47
+ },
48
+ "0.0": {
49
+ "count": 1248
50
+ }
51
+ }
52
+ }
53
+ }
54
+ }
@@ -0,0 +1,90 @@
1
+ {
2
+ "test": {
3
+ "num_samples": 1200,
4
+ "number_texts_intersect_with_train": 1,
5
+ "text_statistics": {
6
+ "total_text_length": 2034506,
7
+ "min_text_length": 184,
8
+ "average_text_length": 1695.4216666666666,
9
+ "max_text_length": 8825,
10
+ "unique_texts": 1200
11
+ },
12
+ "image_statistics": null,
13
+ "label_statistics": {
14
+ "min_labels_per_text": 1,
15
+ "average_label_per_text": 1.0,
16
+ "max_labels_per_text": 1,
17
+ "unique_labels": 8,
18
+ "labels": {
19
+ "Opmerkelijk": {
20
+ "count": 150
21
+ },
22
+ "Buitenland": {
23
+ "count": 150
24
+ },
25
+ "Cultuur & Media": {
26
+ "count": 150
27
+ },
28
+ "Binnenland": {
29
+ "count": 150
30
+ },
31
+ "Politiek": {
32
+ "count": 150
33
+ },
34
+ "Economie": {
35
+ "count": 150
36
+ },
37
+ "Tech": {
38
+ "count": 150
39
+ },
40
+ "Regionaal nieuws": {
41
+ "count": 150
42
+ }
43
+ }
44
+ }
45
+ },
46
+ "train": {
47
+ "num_samples": 5600,
48
+ "number_texts_intersect_with_train": null,
49
+ "text_statistics": {
50
+ "total_text_length": 9620538,
51
+ "min_text_length": 106,
52
+ "average_text_length": 1717.9532142857142,
53
+ "max_text_length": 29389,
54
+ "unique_texts": 5600
55
+ },
56
+ "image_statistics": null,
57
+ "label_statistics": {
58
+ "min_labels_per_text": 1,
59
+ "average_label_per_text": 1.0,
60
+ "max_labels_per_text": 1,
61
+ "unique_labels": 8,
62
+ "labels": {
63
+ "Cultuur & Media": {
64
+ "count": 700
65
+ },
66
+ "Binnenland": {
67
+ "count": 700
68
+ },
69
+ "Buitenland": {
70
+ "count": 700
71
+ },
72
+ "Regionaal nieuws": {
73
+ "count": 700
74
+ },
75
+ "Politiek": {
76
+ "count": 700
77
+ },
78
+ "Economie": {
79
+ "count": 700
80
+ },
81
+ "Opmerkelijk": {
82
+ "count": 700
83
+ },
84
+ "Tech": {
85
+ "count": 700
86
+ }
87
+ }
88
+ }
89
+ }
90
+ }
@@ -0,0 +1,54 @@
1
+ {
2
+ "test": {
3
+ "num_samples": 1326,
4
+ "number_texts_intersect_with_train": null,
5
+ "text_statistics": {
6
+ "total_text_length": 82644,
7
+ "min_text_length": 17,
8
+ "average_text_length": 62.32579185520362,
9
+ "max_text_length": 117,
10
+ "unique_texts": 1326
11
+ },
12
+ "image_statistics": null,
13
+ "label_statistics": {
14
+ "min_labels_per_text": 1,
15
+ "average_label_per_text": 1.0,
16
+ "max_labels_per_text": 1,
17
+ "unique_labels": 2,
18
+ "labels": {
19
+ "0": {
20
+ "count": 826
21
+ },
22
+ "1": {
23
+ "count": 500
24
+ }
25
+ }
26
+ }
27
+ },
28
+ "train": {
29
+ "num_samples": 10609,
30
+ "number_texts_intersect_with_train": null,
31
+ "text_statistics": {
32
+ "total_text_length": 658787,
33
+ "min_text_length": 7,
34
+ "average_text_length": 62.09699311904986,
35
+ "max_text_length": 161,
36
+ "unique_texts": 10609
37
+ },
38
+ "image_statistics": null,
39
+ "label_statistics": {
40
+ "min_labels_per_text": 1,
41
+ "average_label_per_text": 1.0,
42
+ "max_labels_per_text": 1,
43
+ "unique_labels": 2,
44
+ "labels": {
45
+ "1": {
46
+ "count": 4000
47
+ },
48
+ "0": {
49
+ "count": 6609
50
+ }
51
+ }
52
+ }
53
+ }
54
+ }
@@ -0,0 +1,96 @@
1
+ {
2
+ "test": {
3
+ "num_samples": 202,
4
+ "number_texts_intersect_with_train": null,
5
+ "text_statistics": {
6
+ "total_text_length": 11827,
7
+ "min_text_length": 6,
8
+ "average_text_length": 58.54950495049505,
9
+ "max_text_length": 403,
10
+ "unique_texts": 202
11
+ },
12
+ "image_statistics": null,
13
+ "label_statistics": {
14
+ "min_labels_per_text": 1,
15
+ "average_label_per_text": 1.0,
16
+ "max_labels_per_text": 1,
17
+ "unique_labels": 9,
18
+ "labels": {
19
+ "Geschiedenis": {
20
+ "count": 22
21
+ },
22
+ "Klassieke mythologie en Oude Geschiedenis": {
23
+ "count": 22
24
+ },
25
+ "Literatuur": {
26
+ "count": 23
27
+ },
28
+ "Natuur": {
29
+ "count": 23
30
+ },
31
+ "De mens, de mensheid in het algemeen": {
32
+ "count": 22
33
+ },
34
+ "Maatschappij, civilisatie en cultuur": {
35
+ "count": 22
36
+ },
37
+ "Abstracte idee\u00ebn en concepten": {
38
+ "count": 23
39
+ },
40
+ "Religie en magie": {
41
+ "count": 22
42
+ },
43
+ "Bijbel": {
44
+ "count": 23
45
+ }
46
+ }
47
+ }
48
+ },
49
+ "train": {
50
+ "num_samples": 945,
51
+ "number_texts_intersect_with_train": null,
52
+ "text_statistics": {
53
+ "total_text_length": 52510,
54
+ "min_text_length": 3,
55
+ "average_text_length": 55.56613756613756,
56
+ "max_text_length": 793,
57
+ "unique_texts": 945
58
+ },
59
+ "image_statistics": null,
60
+ "label_statistics": {
61
+ "min_labels_per_text": 1,
62
+ "average_label_per_text": 1.0,
63
+ "max_labels_per_text": 1,
64
+ "unique_labels": 9,
65
+ "labels": {
66
+ "Literatuur": {
67
+ "count": 105
68
+ },
69
+ "Maatschappij, civilisatie en cultuur": {
70
+ "count": 105
71
+ },
72
+ "Klassieke mythologie en Oude Geschiedenis": {
73
+ "count": 105
74
+ },
75
+ "Bijbel": {
76
+ "count": 105
77
+ },
78
+ "De mens, de mensheid in het algemeen": {
79
+ "count": 105
80
+ },
81
+ "Abstracte idee\u00ebn en concepten": {
82
+ "count": 105
83
+ },
84
+ "Natuur": {
85
+ "count": 105
86
+ },
87
+ "Geschiedenis": {
88
+ "count": 105
89
+ },
90
+ "Religie en magie": {
91
+ "count": 105
92
+ }
93
+ }
94
+ }
95
+ }
96
+ }