mteb 2.0.5__py3-none-any.whl → 2.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (156) hide show
  1. mteb/__init__.py +10 -1
  2. mteb/_create_dataloaders.py +2 -0
  3. mteb/abstasks/_stratification.py +1 -1
  4. mteb/abstasks/abstask.py +6 -1
  5. mteb/abstasks/dataset_card_template.md +1 -1
  6. mteb/abstasks/retrieval.py +2 -1
  7. mteb/abstasks/retrieval_dataset_loaders.py +1 -1
  8. mteb/abstasks/task_metadata.py +1 -1
  9. mteb/benchmarks/benchmarks/__init__.py +2 -0
  10. mteb/benchmarks/benchmarks/benchmarks.py +82 -11
  11. mteb/benchmarks/get_benchmark.py +1 -1
  12. mteb/descriptive_stats/Classification/DutchColaClassification.json +54 -0
  13. mteb/descriptive_stats/Classification/DutchGovernmentBiasClassification.json +54 -0
  14. mteb/descriptive_stats/Classification/DutchNewsArticlesClassification.json +90 -0
  15. mteb/descriptive_stats/Classification/DutchSarcasticHeadlinesClassification.json +54 -0
  16. mteb/descriptive_stats/Classification/IconclassClassification.json +96 -0
  17. mteb/descriptive_stats/Classification/OpenTenderClassification.json +222 -0
  18. mteb/descriptive_stats/Classification/VaccinChatNLClassification.json +1068 -0
  19. mteb/descriptive_stats/Clustering/DutchNewsArticlesClusteringP2P.json +45 -0
  20. mteb/descriptive_stats/Clustering/DutchNewsArticlesClusteringS2S.json +45 -0
  21. mteb/descriptive_stats/Clustering/IconclassClusteringS2S.json +48 -0
  22. mteb/descriptive_stats/Clustering/OpenTenderClusteringP2P.json +111 -0
  23. mteb/descriptive_stats/Clustering/OpenTenderClusteringS2S.json +111 -0
  24. mteb/descriptive_stats/Clustering/VABBClusteringP2P.json +60 -0
  25. mteb/descriptive_stats/Clustering/VABBClusteringS2S.json +60 -0
  26. mteb/descriptive_stats/Image/Any2AnyMultilingualRetrieval/XFlickr30kCoT2IRetrieval.json +243 -153
  27. mteb/descriptive_stats/Image/Any2AnyMultilingualRetrieval/XM3600T2IRetrieval.json +999 -629
  28. mteb/descriptive_stats/Image/Any2AnyRetrieval/OVENIT2TRetrieval.json +33 -17
  29. mteb/descriptive_stats/Image/DocumentUnderstanding/MIRACLVisionRetrieval.json +574 -0
  30. mteb/descriptive_stats/MultilabelClassification/CovidDisinformationNLMultiLabelClassification.json +84 -0
  31. mteb/descriptive_stats/MultilabelClassification/VABBMultiLabelClassification.json +156 -0
  32. mteb/descriptive_stats/PairClassification/SICKNLPairClassification.json +35 -0
  33. mteb/descriptive_stats/PairClassification/XLWICNLPairClassification.json +35 -0
  34. mteb/descriptive_stats/Retrieval/ClimateFEVERHardNegatives.v2.json +30 -0
  35. mteb/descriptive_stats/Retrieval/DBPediaHardNegatives.v2.json +30 -0
  36. mteb/descriptive_stats/Retrieval/DutchNewsArticlesRetrieval.json +30 -0
  37. mteb/descriptive_stats/Retrieval/FEVERHardNegatives.v2.json +30 -0
  38. mteb/descriptive_stats/Retrieval/HotpotQAHardNegatives.v2.json +30 -0
  39. mteb/descriptive_stats/Retrieval/LegalQANLRetrieval.json +30 -0
  40. mteb/descriptive_stats/Retrieval/OpenTenderRetrieval.json +30 -0
  41. mteb/descriptive_stats/Retrieval/QuoraRetrievalHardNegatives.v2.json +30 -0
  42. mteb/descriptive_stats/Retrieval/RiaNewsRetrievalHardNegatives.v2.json +30 -0
  43. mteb/descriptive_stats/Retrieval/VABBRetrieval.json +30 -0
  44. mteb/descriptive_stats/Retrieval/VDRMultilingualRetrieval.json +184 -0
  45. mteb/descriptive_stats/Retrieval/bBSARDNLRetrieval.json +30 -0
  46. mteb/descriptive_stats/STS/SICK-NL-STS.json +28 -0
  47. mteb/languages/check_language_code.py +11 -3
  48. mteb/languages/language_scripts.py +4 -0
  49. mteb/leaderboard/text_segments.py +1 -1
  50. mteb/models/model_implementations/b1ade_models.py +1 -1
  51. mteb/models/model_implementations/bge_models.py +1 -3
  52. mteb/models/model_implementations/bmretriever_models.py +1 -1
  53. mteb/models/model_implementations/gme_v_models.py +2 -2
  54. mteb/models/model_implementations/ibm_granite_models.py +1 -1
  55. mteb/models/model_implementations/inf_models.py +3 -3
  56. mteb/models/model_implementations/jina_models.py +12 -2
  57. mteb/models/model_implementations/llm2vec_models.py +1 -1
  58. mteb/models/model_implementations/misc_models.py +2 -2
  59. mteb/models/model_implementations/mxbai_models.py +1 -1
  60. mteb/models/model_implementations/salesforce_models.py +1 -1
  61. mteb/models/model_implementations/seed_1_6_embedding_models.py +1 -1
  62. mteb/models/model_implementations/voyage_v.py +9 -9
  63. mteb/results/task_result.py +6 -8
  64. mteb/tasks/classification/dan/angry_tweets_classification.py +2 -2
  65. mteb/tasks/classification/eng/legal_bench_classification.py +3 -3
  66. mteb/tasks/classification/mya/myanmar_news.py +2 -2
  67. mteb/tasks/classification/nld/__init__.py +16 -0
  68. mteb/tasks/classification/nld/dutch_cola_classification.py +38 -0
  69. mteb/tasks/classification/nld/dutch_government_bias_classification.py +37 -0
  70. mteb/tasks/classification/nld/dutch_news_articles_classification.py +30 -0
  71. mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +36 -0
  72. mteb/tasks/classification/nld/iconclass_classification.py +41 -0
  73. mteb/tasks/classification/nld/open_tender_classification.py +38 -0
  74. mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +46 -0
  75. mteb/tasks/classification/tha/wongnai_reviews_classification.py +1 -1
  76. mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -2
  77. mteb/tasks/clustering/__init__.py +1 -0
  78. mteb/tasks/clustering/nld/__init__.py +17 -0
  79. mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +37 -0
  80. mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +37 -0
  81. mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +47 -0
  82. mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +51 -0
  83. mteb/tasks/clustering/nld/open_tender_clustering_s2s.py +41 -0
  84. mteb/tasks/clustering/nld/vabb_clustering_p2p.py +51 -0
  85. mteb/tasks/clustering/nld/vabb_clustering_s2s.py +51 -0
  86. mteb/tasks/multilabel_classification/__init__.py +1 -0
  87. mteb/tasks/multilabel_classification/nld/__init__.py +9 -0
  88. mteb/tasks/multilabel_classification/nld/covid_disinformation_nl_multi_label_classification.py +88 -0
  89. mteb/tasks/multilabel_classification/nld/vabb_multi_label_classification.py +44 -0
  90. mteb/tasks/pair_classification/__init__.py +1 -0
  91. mteb/tasks/pair_classification/multilingual/indic_xnli_pair_classification.py +9 -8
  92. mteb/tasks/pair_classification/nld/__init__.py +7 -0
  93. mteb/tasks/pair_classification/nld/sick_nl_pair_classification.py +36 -0
  94. mteb/tasks/pair_classification/nld/xlwic_nl_pair_classification.py +41 -0
  95. mteb/tasks/retrieval/code/code_rag.py +8 -8
  96. mteb/tasks/retrieval/dan/dan_fever_retrieval.py +1 -1
  97. mteb/tasks/retrieval/dan/tv2_nordretrieval.py +2 -2
  98. mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +2 -2
  99. mteb/tasks/retrieval/eng/__init__.py +18 -4
  100. mteb/tasks/retrieval/eng/climate_fever_retrieval.py +68 -77
  101. mteb/tasks/retrieval/eng/dbpedia_retrieval.py +55 -50
  102. mteb/tasks/retrieval/eng/fever_retrieval.py +62 -67
  103. mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +0 -4
  104. mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +0 -4
  105. mteb/tasks/retrieval/eng/hotpot_qa_retrieval.py +57 -67
  106. mteb/tasks/retrieval/eng/legal_summarization_retrieval.py +1 -1
  107. mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +0 -3
  108. mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +0 -2
  109. mteb/tasks/retrieval/eng/oven_it2t_retrieval.py +1 -1
  110. mteb/tasks/retrieval/eng/quora_retrieval.py +51 -46
  111. mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +0 -4
  112. mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +0 -4
  113. mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +0 -2
  114. mteb/tasks/retrieval/jpn/ja_gov_faqs_retrieval.py +1 -1
  115. mteb/tasks/retrieval/multilingual/belebele_retrieval.py +1 -1
  116. mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +0 -2
  117. mteb/tasks/retrieval/multilingual/miracl_retrieval.py +1 -1
  118. mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +2 -9
  119. mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +0 -2
  120. mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +0 -2
  121. mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +6 -5
  122. mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +3 -4
  123. mteb/tasks/retrieval/nld/__init__.py +10 -0
  124. mteb/tasks/retrieval/nld/bbsard_nl_retrieval.py +41 -0
  125. mteb/tasks/retrieval/nld/dutch_news_articles_retrieval.py +30 -0
  126. mteb/tasks/retrieval/nld/legal_qa_nl_retrieval.py +39 -0
  127. mteb/tasks/retrieval/nld/open_tender_retrieval.py +38 -0
  128. mteb/tasks/retrieval/nld/vabb_retrieval.py +41 -0
  129. mteb/tasks/retrieval/nob/norquad.py +2 -2
  130. mteb/tasks/retrieval/nob/snl_retrieval.py +2 -2
  131. mteb/tasks/retrieval/rus/__init__.py +11 -2
  132. mteb/tasks/retrieval/rus/ria_news_retrieval.py +48 -44
  133. mteb/tasks/retrieval/tur/tur_hist_quad.py +2 -2
  134. mteb/tasks/sts/__init__.py +1 -0
  135. mteb/tasks/sts/nld/__init__.py +5 -0
  136. mteb/tasks/sts/nld/sick_nl_sts.py +41 -0
  137. mteb-2.1.1.dist-info/METADATA +253 -0
  138. {mteb-2.0.5.dist-info → mteb-2.1.1.dist-info}/RECORD +142 -95
  139. mteb/descriptive_stats/Classification/PersianTextTone.json +0 -56
  140. mteb/descriptive_stats/Image/Any2TextMutipleChoice/CVBenchCount.json +0 -37
  141. mteb/descriptive_stats/Image/Any2TextMutipleChoice/CVBenchDepth.json +0 -25
  142. mteb/descriptive_stats/Image/Any2TextMutipleChoice/CVBenchDistance.json +0 -25
  143. mteb/descriptive_stats/Image/Any2TextMutipleChoice/CVBenchRelation.json +0 -25
  144. mteb/descriptive_stats/Image/VisualSTS/STS12VisualSTS.json +0 -20
  145. mteb/descriptive_stats/Image/VisualSTS/STS13VisualSTS.json +0 -20
  146. mteb/descriptive_stats/Image/VisualSTS/STS14VisualSTS.json +0 -20
  147. mteb/descriptive_stats/Image/VisualSTS/STS15VisualSTS.json +0 -20
  148. mteb/descriptive_stats/Image/VisualSTS/STS16VisualSTS.json +0 -20
  149. mteb/descriptive_stats/Image/VisualSTS/STS17MultilingualVisualSTS.json +0 -220
  150. mteb/descriptive_stats/Image/VisualSTS/STSBenchmarkMultilingualVisualSTS.json +0 -402
  151. mteb/descriptive_stats/Reranking/InstructIR.json +0 -31
  152. mteb-2.0.5.dist-info/METADATA +0 -455
  153. {mteb-2.0.5.dist-info → mteb-2.1.1.dist-info}/WHEEL +0 -0
  154. {mteb-2.0.5.dist-info → mteb-2.1.1.dist-info}/entry_points.txt +0 -0
  155. {mteb-2.0.5.dist-info → mteb-2.1.1.dist-info}/licenses/LICENSE +0 -0
  156. {mteb-2.0.5.dist-info → mteb-2.1.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,222 @@
1
+ {
2
+ "test": {
3
+ "num_samples": 4485,
4
+ "number_texts_intersect_with_train": 79,
5
+ "text_statistics": {
6
+ "total_text_length": 2177123,
7
+ "min_text_length": 18,
8
+ "average_text_length": 485.4231884057971,
9
+ "max_text_length": 12193,
10
+ "unique_texts": 4485
11
+ },
12
+ "image_statistics": null,
13
+ "label_statistics": {
14
+ "min_labels_per_text": 1,
15
+ "average_label_per_text": 1.0,
16
+ "max_labels_per_text": 1,
17
+ "unique_labels": 30,
18
+ "labels": {
19
+ "Kantoormachines en gegevensverwerkende apparatuur, kantooruitrusting en -benodigdheden, uitgez. meubilair en softwarepakketten": {
20
+ "count": 150
21
+ },
22
+ "Medische apparatuur, farmaceutische artikelen en artikelen voor lichaamsverzorging": {
23
+ "count": 150
24
+ },
25
+ "Onderzoek en ontwikkeling, en aanverwante adviezen": {
26
+ "count": 147
27
+ },
28
+ "Zakelijke dienstverlening: juridisch, marketing, consulting, drukkerij en beveiliging": {
29
+ "count": 150
30
+ },
31
+ "Uitrusting voor veiligheid, brandweer, politie en leger": {
32
+ "count": 150
33
+ },
34
+ "Structuren en materialen voor de bouw; ondersteunende producten voor de bouw (uitgezonderd elektrische apparatuur)": {
35
+ "count": 149
36
+ },
37
+ "Diensten voor land-, bos- en tuinbouw, aquicultuur en imkerij": {
38
+ "count": 150
39
+ },
40
+ "Reparatie- en onderhoudsdiensten": {
41
+ "count": 150
42
+ },
43
+ "Overige gemeenschaps-, sociale en persoonlijke diensten": {
44
+ "count": 150
45
+ },
46
+ "IT-diensten: adviezen, softwareontwikkeling, internet en ondersteuning": {
47
+ "count": 150
48
+ },
49
+ "Kleding, schoeisel, bagageartikelen en accessoires": {
50
+ "count": 150
51
+ },
52
+ "Meubelen (m.i.v. kantoormeubelen), inrichtingsartikelen, huishoudelijke apparaten (uitgez. verlichting) en schoonmaakproducten": {
53
+ "count": 149
54
+ },
55
+ "Gezondheidszorg en maatschappelijk werk": {
56
+ "count": 150
57
+ },
58
+ "Laboratoriuminstrumenten, optische en precisie-instrumenten (uitgezonderd brillen)": {
59
+ "count": 149
60
+ },
61
+ "Voeding, dranken, tabak en aanverwante producten": {
62
+ "count": 150
63
+ },
64
+ "Bouwwerkzaamheden": {
65
+ "count": 150
66
+ },
67
+ "Bedrijfsmachines": {
68
+ "count": 149
69
+ },
70
+ "Elektrische machines, apparaten, uitrusting en verbruiksartikelen; verlichting": {
71
+ "count": 149
72
+ },
73
+ "Vervoersdiensten (uitg. vervoer van afval)": {
74
+ "count": 149
75
+ },
76
+ "Financi\u00eble en verzekeringsdiensten": {
77
+ "count": 150
78
+ },
79
+ "Radio-, televisie-, communicatie-, telecommunicatietoestellen en aanverwante apparatuur": {
80
+ "count": 150
81
+ },
82
+ "Diensten voor onderwijs en opleiding": {
83
+ "count": 149
84
+ },
85
+ "Drukwerk en aanverwante producten": {
86
+ "count": 149
87
+ },
88
+ "Vervoersmaterieel en bijbehorende producten": {
89
+ "count": 149
90
+ },
91
+ "Software en informatiesystemen": {
92
+ "count": 150
93
+ },
94
+ "Dienstverlening op het gebied van architectuur, bouwkunde, civiele techniek en inspectie": {
95
+ "count": 150
96
+ },
97
+ "Diensten inzake afvalwater, afval, reiniging en milieu": {
98
+ "count": 150
99
+ },
100
+ "Post- en telecommunicatiediensten": {
101
+ "count": 149
102
+ },
103
+ "Aardolieproducten, brandstof, elektriciteit en andere energiebronnen": {
104
+ "count": 149
105
+ },
106
+ "Diensten voor hotel, restaurant en detailhandel": {
107
+ "count": 149
108
+ }
109
+ }
110
+ }
111
+ },
112
+ "train": {
113
+ "num_samples": 20777,
114
+ "number_texts_intersect_with_train": null,
115
+ "text_statistics": {
116
+ "total_text_length": 10089834,
117
+ "min_text_length": 13,
118
+ "average_text_length": 485.6251624392357,
119
+ "max_text_length": 13655,
120
+ "unique_texts": 20777
121
+ },
122
+ "image_statistics": null,
123
+ "label_statistics": {
124
+ "min_labels_per_text": 1,
125
+ "average_label_per_text": 1.0,
126
+ "max_labels_per_text": 1,
127
+ "unique_labels": 30,
128
+ "labels": {
129
+ "Radio-, televisie-, communicatie-, telecommunicatietoestellen en aanverwante apparatuur": {
130
+ "count": 693
131
+ },
132
+ "Diensten voor hotel, restaurant en detailhandel": {
133
+ "count": 684
134
+ },
135
+ "Kleding, schoeisel, bagageartikelen en accessoires": {
136
+ "count": 685
137
+ },
138
+ "Kantoormachines en gegevensverwerkende apparatuur, kantooruitrusting en -benodigdheden, uitgez. meubilair en softwarepakketten": {
139
+ "count": 694
140
+ },
141
+ "Post- en telecommunicatiediensten": {
142
+ "count": 688
143
+ },
144
+ "Bouwwerkzaamheden": {
145
+ "count": 700
146
+ },
147
+ "Aardolieproducten, brandstof, elektriciteit en andere energiebronnen": {
148
+ "count": 690
149
+ },
150
+ "Voeding, dranken, tabak en aanverwante producten": {
151
+ "count": 680
152
+ },
153
+ "Bedrijfsmachines": {
154
+ "count": 693
155
+ },
156
+ "Structuren en materialen voor de bouw; ondersteunende producten voor de bouw (uitgezonderd elektrische apparatuur)": {
157
+ "count": 690
158
+ },
159
+ "Elektrische machines, apparaten, uitrusting en verbruiksartikelen; verlichting": {
160
+ "count": 684
161
+ },
162
+ "Meubelen (m.i.v. kantoormeubelen), inrichtingsartikelen, huishoudelijke apparaten (uitgez. verlichting) en schoonmaakproducten": {
163
+ "count": 696
164
+ },
165
+ "Drukwerk en aanverwante producten": {
166
+ "count": 687
167
+ },
168
+ "Dienstverlening op het gebied van architectuur, bouwkunde, civiele techniek en inspectie": {
169
+ "count": 699
170
+ },
171
+ "Financi\u00eble en verzekeringsdiensten": {
172
+ "count": 692
173
+ },
174
+ "Vervoersmaterieel en bijbehorende producten": {
175
+ "count": 692
176
+ },
177
+ "Diensten voor onderwijs en opleiding": {
178
+ "count": 698
179
+ },
180
+ "Gezondheidszorg en maatschappelijk werk": {
181
+ "count": 696
182
+ },
183
+ "Zakelijke dienstverlening: juridisch, marketing, consulting, drukkerij en beveiliging": {
184
+ "count": 698
185
+ },
186
+ "Medische apparatuur, farmaceutische artikelen en artikelen voor lichaamsverzorging": {
187
+ "count": 692
188
+ },
189
+ "IT-diensten: adviezen, softwareontwikkeling, internet en ondersteuning": {
190
+ "count": 699
191
+ },
192
+ "Onderzoek en ontwikkeling, en aanverwante adviezen": {
193
+ "count": 688
194
+ },
195
+ "Overige gemeenschaps-, sociale en persoonlijke diensten": {
196
+ "count": 695
197
+ },
198
+ "Diensten inzake afvalwater, afval, reiniging en milieu": {
199
+ "count": 699
200
+ },
201
+ "Diensten voor land-, bos- en tuinbouw, aquicultuur en imkerij": {
202
+ "count": 695
203
+ },
204
+ "Laboratoriuminstrumenten, optische en precisie-instrumenten (uitgezonderd brillen)": {
205
+ "count": 693
206
+ },
207
+ "Reparatie- en onderhoudsdiensten": {
208
+ "count": 698
209
+ },
210
+ "Software en informatiesystemen": {
211
+ "count": 697
212
+ },
213
+ "Vervoersdiensten (uitg. vervoer van afval)": {
214
+ "count": 691
215
+ },
216
+ "Uitrusting voor veiligheid, brandweer, politie en leger": {
217
+ "count": 691
218
+ }
219
+ }
220
+ }
221
+ }
222
+ }