mteb 2.0.4__py3-none-any.whl → 2.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. mteb/__init__.py +10 -1
  2. mteb/benchmarks/benchmarks/__init__.py +2 -0
  3. mteb/benchmarks/benchmarks/benchmarks.py +75 -0
  4. mteb/descriptive_stats/BitextMining/BUCC.json +70 -40
  5. mteb/descriptive_stats/Classification/DKHateClassification.json +40 -24
  6. mteb/descriptive_stats/Classification/DutchColaClassification.json +54 -0
  7. mteb/descriptive_stats/Classification/DutchGovernmentBiasClassification.json +54 -0
  8. mteb/descriptive_stats/Classification/DutchNewsArticlesClassification.json +90 -0
  9. mteb/descriptive_stats/Classification/DutchSarcasticHeadlinesClassification.json +54 -0
  10. mteb/descriptive_stats/Classification/FinancialPhrasebankClassification.json +23 -15
  11. mteb/descriptive_stats/Classification/IconclassClassification.json +96 -0
  12. mteb/descriptive_stats/Classification/ImdbClassification.json +40 -24
  13. mteb/descriptive_stats/Classification/KorHateClassification.json +23 -15
  14. mteb/descriptive_stats/Classification/OpenTenderClassification.json +222 -0
  15. mteb/descriptive_stats/Classification/VaccinChatNLClassification.json +1068 -0
  16. mteb/descriptive_stats/Clustering/ArxivClusteringP2P.json +555 -550
  17. mteb/descriptive_stats/Clustering/ArxivClusteringP2P.v2.json +546 -541
  18. mteb/descriptive_stats/Clustering/ArxivClusteringS2S.json +555 -550
  19. mteb/descriptive_stats/Clustering/DutchNewsArticlesClusteringP2P.json +45 -0
  20. mteb/descriptive_stats/Clustering/DutchNewsArticlesClusteringS2S.json +45 -0
  21. mteb/descriptive_stats/Clustering/IconclassClusteringS2S.json +48 -0
  22. mteb/descriptive_stats/Clustering/MLSUMClusteringP2P.json +2466 -2416
  23. mteb/descriptive_stats/Clustering/OpenTenderClusteringP2P.json +111 -0
  24. mteb/descriptive_stats/Clustering/OpenTenderClusteringS2S.json +111 -0
  25. mteb/descriptive_stats/Clustering/RedditClusteringP2P.json +1365 -1360
  26. mteb/descriptive_stats/Clustering/SNLClustering.json +378 -373
  27. mteb/descriptive_stats/Clustering/SwednClustering.json +28 -23
  28. mteb/descriptive_stats/Clustering/VABBClusteringP2P.json +60 -0
  29. mteb/descriptive_stats/Clustering/VABBClusteringS2S.json +60 -0
  30. mteb/descriptive_stats/Clustering/VGClustering.json +54 -49
  31. mteb/descriptive_stats/Image/Any2AnyMultilingualRetrieval/WITT2IRetrieval.json +324 -204
  32. mteb/descriptive_stats/Image/Any2AnyRetrieval/MemotionI2TRetrieval.json +28 -18
  33. mteb/descriptive_stats/Image/DocumentUnderstanding/JinaVDRAirbnbSyntheticRetrieval.json +334 -0
  34. mteb/descriptive_stats/Image/DocumentUnderstanding/JinaVDRGitHubReadmeRetrieval.json +544 -0
  35. mteb/descriptive_stats/Image/DocumentUnderstanding/JinaVDRTweetStockSyntheticsRetrieval.json +334 -0
  36. mteb/descriptive_stats/Image/DocumentUnderstanding/JinaVDRWikimediaCommonsDocumentsRetrieval.json +634 -0
  37. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore2ESGReportsRetrieval.json +154 -0
  38. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore2EconomicsReportsRetrieval.json +154 -0
  39. mteb/descriptive_stats/Image/ImageClassification/Imagenet1k.json +6039 -3007
  40. mteb/descriptive_stats/Image/ZeroShotClassification/Imagenet1kZeroShot.json +3024 -3010
  41. mteb/descriptive_stats/Image/ZeroShotClassification/PatchCamelyonZeroShot.json +30 -16
  42. mteb/descriptive_stats/MultilabelClassification/CovidDisinformationNLMultiLabelClassification.json +84 -0
  43. mteb/descriptive_stats/MultilabelClassification/VABBMultiLabelClassification.json +156 -0
  44. mteb/descriptive_stats/PairClassification/SICKNLPairClassification.json +35 -0
  45. mteb/descriptive_stats/PairClassification/XLWICNLPairClassification.json +35 -0
  46. mteb/descriptive_stats/Reranking/MIRACLReranking.json +555 -479
  47. mteb/descriptive_stats/Reranking/MindSmallReranking.json +29 -25
  48. mteb/descriptive_stats/Retrieval/AlloprofRetrieval.json +25 -26
  49. mteb/descriptive_stats/Retrieval/Code1Retrieval.json +30 -0
  50. mteb/descriptive_stats/Retrieval/DanFEVER.json +25 -26
  51. mteb/descriptive_stats/Retrieval/DutchNewsArticlesRetrieval.json +30 -0
  52. mteb/descriptive_stats/Retrieval/EnglishFinance1Retrieval.json +30 -0
  53. mteb/descriptive_stats/Retrieval/EnglishFinance2Retrieval.json +30 -0
  54. mteb/descriptive_stats/Retrieval/EnglishFinance3Retrieval.json +30 -0
  55. mteb/descriptive_stats/Retrieval/EnglishFinance4Retrieval.json +30 -0
  56. mteb/descriptive_stats/Retrieval/EnglishHealthcare1Retrieval.json +30 -0
  57. mteb/descriptive_stats/Retrieval/French1Retrieval.json +30 -0
  58. mteb/descriptive_stats/Retrieval/FrenchLegal1Retrieval.json +30 -0
  59. mteb/descriptive_stats/Retrieval/German1Retrieval.json +30 -0
  60. mteb/descriptive_stats/Retrieval/GermanHealthcare1Retrieval.json +30 -0
  61. mteb/descriptive_stats/Retrieval/GermanLegal1Retrieval.json +30 -0
  62. mteb/descriptive_stats/Retrieval/JapaneseCode1Retrieval.json +30 -0
  63. mteb/descriptive_stats/Retrieval/JapaneseLegal1Retrieval.json +30 -0
  64. mteb/descriptive_stats/Retrieval/LegalQANLRetrieval.json +30 -0
  65. mteb/descriptive_stats/Retrieval/MIRACLRetrieval.json +475 -494
  66. mteb/descriptive_stats/Retrieval/MSMARCO-Fa.json +25 -26
  67. mteb/descriptive_stats/Retrieval/MSMARCO.json +25 -84
  68. mteb/descriptive_stats/Retrieval/OpenTenderRetrieval.json +30 -0
  69. mteb/descriptive_stats/Retrieval/Touche2020.json +25 -26
  70. mteb/descriptive_stats/Retrieval/VABBRetrieval.json +30 -0
  71. mteb/descriptive_stats/Retrieval/bBSARDNLRetrieval.json +30 -0
  72. mteb/descriptive_stats/STS/SICK-NL-STS.json +28 -0
  73. mteb/descriptive_stats/Summarization/SummEval.json +27 -50
  74. mteb/descriptive_stats/Summarization/SummEvalFr.json +27 -50
  75. mteb/models/model_implementations/kalm_models.py +29 -0
  76. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +1 -1
  77. mteb/tasks/classification/eng/financial_phrasebank_classification.py +0 -3
  78. mteb/tasks/classification/kor/kor_hate_classification.py +0 -12
  79. mteb/tasks/classification/nld/__init__.py +16 -0
  80. mteb/tasks/classification/nld/dutch_cola_classification.py +38 -0
  81. mteb/tasks/classification/nld/dutch_government_bias_classification.py +37 -0
  82. mteb/tasks/classification/nld/dutch_news_articles_classification.py +30 -0
  83. mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +36 -0
  84. mteb/tasks/classification/nld/iconclass_classification.py +41 -0
  85. mteb/tasks/classification/nld/open_tender_classification.py +38 -0
  86. mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +46 -0
  87. mteb/tasks/clustering/__init__.py +1 -0
  88. mteb/tasks/clustering/nld/__init__.py +17 -0
  89. mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +37 -0
  90. mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +37 -0
  91. mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +47 -0
  92. mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +51 -0
  93. mteb/tasks/clustering/nld/open_tender_clustering_s2s.py +41 -0
  94. mteb/tasks/clustering/nld/vabb_clustering_p2p.py +51 -0
  95. mteb/tasks/clustering/nld/vabb_clustering_s2s.py +51 -0
  96. mteb/tasks/clustering/swe/swedn_clustering.py +2 -2
  97. mteb/tasks/multilabel_classification/__init__.py +1 -0
  98. mteb/tasks/multilabel_classification/nld/__init__.py +9 -0
  99. mteb/tasks/multilabel_classification/nld/covid_disinformation_nl_multi_label_classification.py +88 -0
  100. mteb/tasks/multilabel_classification/nld/vabb_multi_label_classification.py +44 -0
  101. mteb/tasks/pair_classification/__init__.py +1 -0
  102. mteb/tasks/pair_classification/nld/__init__.py +7 -0
  103. mteb/tasks/pair_classification/nld/sick_nl_pair_classification.py +36 -0
  104. mteb/tasks/pair_classification/nld/xlwic_nl_pair_classification.py +41 -0
  105. mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py +1 -1
  106. mteb/tasks/retrieval/nld/__init__.py +10 -0
  107. mteb/tasks/retrieval/nld/bbsard_nl_retrieval.py +41 -0
  108. mteb/tasks/retrieval/nld/dutch_news_articles_retrieval.py +30 -0
  109. mteb/tasks/retrieval/nld/legal_qa_nl_retrieval.py +39 -0
  110. mteb/tasks/retrieval/nld/open_tender_retrieval.py +38 -0
  111. mteb/tasks/retrieval/nld/vabb_retrieval.py +41 -0
  112. mteb/tasks/sts/__init__.py +1 -0
  113. mteb/tasks/sts/nld/__init__.py +5 -0
  114. mteb/tasks/sts/nld/sick_nl_sts.py +41 -0
  115. {mteb-2.0.4.dist-info → mteb-2.1.0.dist-info}/METADATA +2 -204
  116. {mteb-2.0.4.dist-info → mteb-2.1.0.dist-info}/RECORD +120 -49
  117. {mteb-2.0.4.dist-info → mteb-2.1.0.dist-info}/WHEEL +0 -0
  118. {mteb-2.0.4.dist-info → mteb-2.1.0.dist-info}/entry_points.txt +0 -0
  119. {mteb-2.0.4.dist-info → mteb-2.1.0.dist-info}/licenses/LICENSE +0 -0
  120. {mteb-2.0.4.dist-info → mteb-2.1.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,111 @@
1
+ {
2
+ "test": {
3
+ "num_samples": 4485,
4
+ "text_statistics": {
5
+ "total_text_length": 2177123,
6
+ "min_text_length": 18,
7
+ "average_text_length": 485.4231884057971,
8
+ "max_text_length": 12193,
9
+ "unique_texts": 4485
10
+ },
11
+ "image_statistics": null,
12
+ "labels_statistics": {
13
+ "min_labels_per_text": 1,
14
+ "average_label_per_text": 1.0,
15
+ "max_labels_per_text": 1,
16
+ "unique_labels": 30,
17
+ "labels": {
18
+ "Kantoormachines en gegevensverwerkende apparatuur, kantooruitrusting en -benodigdheden, uitgez. meubilair en softwarepakketten": {
19
+ "count": 150
20
+ },
21
+ "Medische apparatuur, farmaceutische artikelen en artikelen voor lichaamsverzorging": {
22
+ "count": 150
23
+ },
24
+ "Onderzoek en ontwikkeling, en aanverwante adviezen": {
25
+ "count": 147
26
+ },
27
+ "Zakelijke dienstverlening: juridisch, marketing, consulting, drukkerij en beveiliging": {
28
+ "count": 150
29
+ },
30
+ "Uitrusting voor veiligheid, brandweer, politie en leger": {
31
+ "count": 150
32
+ },
33
+ "Structuren en materialen voor de bouw; ondersteunende producten voor de bouw (uitgezonderd elektrische apparatuur)": {
34
+ "count": 149
35
+ },
36
+ "Diensten voor land-, bos- en tuinbouw, aquicultuur en imkerij": {
37
+ "count": 150
38
+ },
39
+ "Reparatie- en onderhoudsdiensten": {
40
+ "count": 150
41
+ },
42
+ "Overige gemeenschaps-, sociale en persoonlijke diensten": {
43
+ "count": 150
44
+ },
45
+ "IT-diensten: adviezen, softwareontwikkeling, internet en ondersteuning": {
46
+ "count": 150
47
+ },
48
+ "Kleding, schoeisel, bagageartikelen en accessoires": {
49
+ "count": 150
50
+ },
51
+ "Meubelen (m.i.v. kantoormeubelen), inrichtingsartikelen, huishoudelijke apparaten (uitgez. verlichting) en schoonmaakproducten": {
52
+ "count": 149
53
+ },
54
+ "Gezondheidszorg en maatschappelijk werk": {
55
+ "count": 150
56
+ },
57
+ "Laboratoriuminstrumenten, optische en precisie-instrumenten (uitgezonderd brillen)": {
58
+ "count": 149
59
+ },
60
+ "Voeding, dranken, tabak en aanverwante producten": {
61
+ "count": 150
62
+ },
63
+ "Bouwwerkzaamheden": {
64
+ "count": 150
65
+ },
66
+ "Bedrijfsmachines": {
67
+ "count": 149
68
+ },
69
+ "Elektrische machines, apparaten, uitrusting en verbruiksartikelen; verlichting": {
70
+ "count": 149
71
+ },
72
+ "Vervoersdiensten (uitg. vervoer van afval)": {
73
+ "count": 149
74
+ },
75
+ "Financi\u00eble en verzekeringsdiensten": {
76
+ "count": 150
77
+ },
78
+ "Radio-, televisie-, communicatie-, telecommunicatietoestellen en aanverwante apparatuur": {
79
+ "count": 150
80
+ },
81
+ "Diensten voor onderwijs en opleiding": {
82
+ "count": 149
83
+ },
84
+ "Drukwerk en aanverwante producten": {
85
+ "count": 149
86
+ },
87
+ "Vervoersmaterieel en bijbehorende producten": {
88
+ "count": 149
89
+ },
90
+ "Software en informatiesystemen": {
91
+ "count": 150
92
+ },
93
+ "Dienstverlening op het gebied van architectuur, bouwkunde, civiele techniek en inspectie": {
94
+ "count": 150
95
+ },
96
+ "Diensten inzake afvalwater, afval, reiniging en milieu": {
97
+ "count": 150
98
+ },
99
+ "Post- en telecommunicatiediensten": {
100
+ "count": 149
101
+ },
102
+ "Aardolieproducten, brandstof, elektriciteit en andere energiebronnen": {
103
+ "count": 149
104
+ },
105
+ "Diensten voor hotel, restaurant en detailhandel": {
106
+ "count": 149
107
+ }
108
+ }
109
+ }
110
+ }
111
+ }
@@ -0,0 +1,111 @@
1
+ {
2
+ "test": {
3
+ "num_samples": 4381,
4
+ "text_statistics": {
5
+ "total_text_length": 264422,
6
+ "min_text_length": 6,
7
+ "average_text_length": 60.3565396028304,
8
+ "max_text_length": 420,
9
+ "unique_texts": 4381
10
+ },
11
+ "image_statistics": null,
12
+ "labels_statistics": {
13
+ "min_labels_per_text": 1,
14
+ "average_label_per_text": 1.0,
15
+ "max_labels_per_text": 1,
16
+ "unique_labels": 30,
17
+ "labels": {
18
+ "Kantoormachines en gegevensverwerkende apparatuur, kantooruitrusting en -benodigdheden, uitgez. meubilair en softwarepakketten": {
19
+ "count": 143
20
+ },
21
+ "Medische apparatuur, farmaceutische artikelen en artikelen voor lichaamsverzorging": {
22
+ "count": 148
23
+ },
24
+ "Onderzoek en ontwikkeling, en aanverwante adviezen": {
25
+ "count": 147
26
+ },
27
+ "Zakelijke dienstverlening: juridisch, marketing, consulting, drukkerij en beveiliging": {
28
+ "count": 145
29
+ },
30
+ "Uitrusting voor veiligheid, brandweer, politie en leger": {
31
+ "count": 149
32
+ },
33
+ "Structuren en materialen voor de bouw; ondersteunende producten voor de bouw (uitgezonderd elektrische apparatuur)": {
34
+ "count": 146
35
+ },
36
+ "Diensten voor land-, bos- en tuinbouw, aquicultuur en imkerij": {
37
+ "count": 150
38
+ },
39
+ "Reparatie- en onderhoudsdiensten": {
40
+ "count": 148
41
+ },
42
+ "Overige gemeenschaps-, sociale en persoonlijke diensten": {
43
+ "count": 143
44
+ },
45
+ "IT-diensten: adviezen, softwareontwikkeling, internet en ondersteuning": {
46
+ "count": 149
47
+ },
48
+ "Kleding, schoeisel, bagageartikelen en accessoires": {
49
+ "count": 143
50
+ },
51
+ "Meubelen (m.i.v. kantoormeubelen), inrichtingsartikelen, huishoudelijke apparaten (uitgez. verlichting) en schoonmaakproducten": {
52
+ "count": 143
53
+ },
54
+ "Gezondheidszorg en maatschappelijk werk": {
55
+ "count": 147
56
+ },
57
+ "Laboratoriuminstrumenten, optische en precisie-instrumenten (uitgezonderd brillen)": {
58
+ "count": 147
59
+ },
60
+ "Voeding, dranken, tabak en aanverwante producten": {
61
+ "count": 149
62
+ },
63
+ "Bouwwerkzaamheden": {
64
+ "count": 149
65
+ },
66
+ "Bedrijfsmachines": {
67
+ "count": 149
68
+ },
69
+ "Elektrische machines, apparaten, uitrusting en verbruiksartikelen; verlichting": {
70
+ "count": 149
71
+ },
72
+ "Vervoersdiensten (uitg. vervoer van afval)": {
73
+ "count": 144
74
+ },
75
+ "Financi\u00eble en verzekeringsdiensten": {
76
+ "count": 144
77
+ },
78
+ "Radio-, televisie-, communicatie-, telecommunicatietoestellen en aanverwante apparatuur": {
79
+ "count": 145
80
+ },
81
+ "Diensten voor onderwijs en opleiding": {
82
+ "count": 148
83
+ },
84
+ "Drukwerk en aanverwante producten": {
85
+ "count": 137
86
+ },
87
+ "Vervoersmaterieel en bijbehorende producten": {
88
+ "count": 148
89
+ },
90
+ "Software en informatiesystemen": {
91
+ "count": 150
92
+ },
93
+ "Dienstverlening op het gebied van architectuur, bouwkunde, civiele techniek en inspectie": {
94
+ "count": 149
95
+ },
96
+ "Diensten inzake afvalwater, afval, reiniging en milieu": {
97
+ "count": 147
98
+ },
99
+ "Post- en telecommunicatiediensten": {
100
+ "count": 142
101
+ },
102
+ "Aardolieproducten, brandstof, elektriciteit en andere energiebronnen": {
103
+ "count": 143
104
+ },
105
+ "Diensten voor hotel, restaurant en detailhandel": {
106
+ "count": 140
107
+ }
108
+ }
109
+ }
110
+ }
111
+ }