mteb 2.0.4__py3-none-any.whl → 2.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. mteb/descriptive_stats/BitextMining/BUCC.json +70 -40
  2. mteb/descriptive_stats/Classification/DKHateClassification.json +40 -24
  3. mteb/descriptive_stats/Classification/FinancialPhrasebankClassification.json +23 -15
  4. mteb/descriptive_stats/Classification/ImdbClassification.json +40 -24
  5. mteb/descriptive_stats/Classification/KorHateClassification.json +23 -15
  6. mteb/descriptive_stats/Clustering/ArxivClusteringP2P.json +555 -550
  7. mteb/descriptive_stats/Clustering/ArxivClusteringP2P.v2.json +546 -541
  8. mteb/descriptive_stats/Clustering/ArxivClusteringS2S.json +555 -550
  9. mteb/descriptive_stats/Clustering/MLSUMClusteringP2P.json +2466 -2416
  10. mteb/descriptive_stats/Clustering/RedditClusteringP2P.json +1365 -1360
  11. mteb/descriptive_stats/Clustering/SNLClustering.json +378 -373
  12. mteb/descriptive_stats/Clustering/SwednClustering.json +28 -23
  13. mteb/descriptive_stats/Clustering/VGClustering.json +54 -49
  14. mteb/descriptive_stats/Image/Any2AnyMultilingualRetrieval/WITT2IRetrieval.json +324 -204
  15. mteb/descriptive_stats/Image/Any2AnyRetrieval/MemotionI2TRetrieval.json +28 -18
  16. mteb/descriptive_stats/Image/DocumentUnderstanding/JinaVDRAirbnbSyntheticRetrieval.json +334 -0
  17. mteb/descriptive_stats/Image/DocumentUnderstanding/JinaVDRGitHubReadmeRetrieval.json +544 -0
  18. mteb/descriptive_stats/Image/DocumentUnderstanding/JinaVDRTweetStockSyntheticsRetrieval.json +334 -0
  19. mteb/descriptive_stats/Image/DocumentUnderstanding/JinaVDRWikimediaCommonsDocumentsRetrieval.json +634 -0
  20. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore2ESGReportsRetrieval.json +154 -0
  21. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore2EconomicsReportsRetrieval.json +154 -0
  22. mteb/descriptive_stats/Image/ImageClassification/Imagenet1k.json +6039 -3007
  23. mteb/descriptive_stats/Image/ZeroShotClassification/Imagenet1kZeroShot.json +3024 -3010
  24. mteb/descriptive_stats/Image/ZeroShotClassification/PatchCamelyonZeroShot.json +30 -16
  25. mteb/descriptive_stats/Reranking/MIRACLReranking.json +555 -479
  26. mteb/descriptive_stats/Reranking/MindSmallReranking.json +29 -25
  27. mteb/descriptive_stats/Retrieval/AlloprofRetrieval.json +25 -26
  28. mteb/descriptive_stats/Retrieval/Code1Retrieval.json +30 -0
  29. mteb/descriptive_stats/Retrieval/DanFEVER.json +25 -26
  30. mteb/descriptive_stats/Retrieval/EnglishFinance1Retrieval.json +30 -0
  31. mteb/descriptive_stats/Retrieval/EnglishFinance2Retrieval.json +30 -0
  32. mteb/descriptive_stats/Retrieval/EnglishFinance3Retrieval.json +30 -0
  33. mteb/descriptive_stats/Retrieval/EnglishFinance4Retrieval.json +30 -0
  34. mteb/descriptive_stats/Retrieval/EnglishHealthcare1Retrieval.json +30 -0
  35. mteb/descriptive_stats/Retrieval/French1Retrieval.json +30 -0
  36. mteb/descriptive_stats/Retrieval/FrenchLegal1Retrieval.json +30 -0
  37. mteb/descriptive_stats/Retrieval/German1Retrieval.json +30 -0
  38. mteb/descriptive_stats/Retrieval/GermanHealthcare1Retrieval.json +30 -0
  39. mteb/descriptive_stats/Retrieval/GermanLegal1Retrieval.json +30 -0
  40. mteb/descriptive_stats/Retrieval/JapaneseCode1Retrieval.json +30 -0
  41. mteb/descriptive_stats/Retrieval/JapaneseLegal1Retrieval.json +30 -0
  42. mteb/descriptive_stats/Retrieval/MIRACLRetrieval.json +475 -494
  43. mteb/descriptive_stats/Retrieval/MSMARCO-Fa.json +25 -26
  44. mteb/descriptive_stats/Retrieval/MSMARCO.json +25 -84
  45. mteb/descriptive_stats/Retrieval/Touche2020.json +25 -26
  46. mteb/descriptive_stats/Summarization/SummEval.json +27 -50
  47. mteb/descriptive_stats/Summarization/SummEvalFr.json +27 -50
  48. mteb/models/model_implementations/kalm_models.py +29 -0
  49. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +1 -1
  50. mteb/tasks/classification/eng/financial_phrasebank_classification.py +0 -3
  51. mteb/tasks/classification/kor/kor_hate_classification.py +0 -12
  52. mteb/tasks/clustering/swe/swedn_clustering.py +2 -2
  53. mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py +1 -1
  54. {mteb-2.0.4.dist-info → mteb-2.0.5.dist-info}/METADATA +1 -1
  55. {mteb-2.0.4.dist-info → mteb-2.0.5.dist-info}/RECORD +59 -40
  56. {mteb-2.0.4.dist-info → mteb-2.0.5.dist-info}/WHEEL +0 -0
  57. {mteb-2.0.4.dist-info → mteb-2.0.5.dist-info}/entry_points.txt +0 -0
  58. {mteb-2.0.4.dist-info → mteb-2.0.5.dist-info}/licenses/LICENSE +0 -0
  59. {mteb-2.0.4.dist-info → mteb-2.0.5.dist-info}/top_level.txt +0 -0
@@ -1,243 +1,363 @@
1
1
  {
2
2
  "test": {
3
- "number_of_characters": 506601,
4
3
  "num_samples": 18137,
5
- "num_queries": 9584,
6
- "num_documents": 8553,
7
- "min_document_length": 0,
8
- "average_document_length": 0,
9
- "max_document_length": 0,
10
- "unique_documents": 0,
11
- "num_document_images": 8553,
12
- "min_query_length": 9,
13
- "average_query_length": 52.85903589315526,
14
- "max_query_length": 779,
15
- "unique_queries": 9076,
16
- "num_query_images": 0,
17
- "min_relevant_docs_per_query": 1,
18
- "average_relevant_docs_per_query": 1.0,
19
- "max_relevant_docs_per_query": 1,
20
- "unique_relevant_docs": 8553,
4
+ "number_of_characters": 429929,
5
+ "documents_text_statistics": null,
6
+ "documents_image_statistics": {
7
+ "min_image_width": 298,
8
+ "average_image_width": 299.9867882614287,
9
+ "max_image_width": 300,
10
+ "min_image_height": 25,
11
+ "average_image_height": 280.2253010639542,
12
+ "max_image_height": 2566,
13
+ "unique_images": 6191
14
+ },
15
+ "queries_text_statistics": {
16
+ "total_text_length": 429929,
17
+ "min_text_length": 1,
18
+ "average_text_length": 44.85903589315526,
19
+ "max_text_length": 771,
20
+ "unique_texts": 8970
21
+ },
22
+ "queries_image_statistics": null,
23
+ "relevant_docs_statistics": {
24
+ "num_relevant_docs": 9584,
25
+ "min_relevant_docs_per_query": 1,
26
+ "average_relevant_docs_per_query": 1.0,
27
+ "max_relevant_docs_per_query": 1,
28
+ "unique_relevant_docs": 8553
29
+ },
30
+ "top_ranked_statistics": null,
21
31
  "hf_subset_descriptive_stats": {
22
32
  "ar": {
23
- "number_of_characters": 46144,
24
33
  "num_samples": 1682,
25
- "num_queries": 890,
26
- "num_documents": 792,
27
- "min_document_length": 0,
28
- "average_document_length": 0,
29
- "max_document_length": 0,
30
- "unique_documents": 0,
31
- "num_document_images": 792,
32
- "min_query_length": 4,
33
- "average_query_length": 51.84719101123596,
34
- "max_query_length": 533,
35
- "unique_queries": 871,
36
- "num_query_images": 0,
37
- "min_relevant_docs_per_query": 1,
38
- "average_relevant_docs_per_query": 1.0,
39
- "max_relevant_docs_per_query": 1,
40
- "unique_relevant_docs": 792
34
+ "number_of_characters": 46144,
35
+ "documents_text_statistics": null,
36
+ "documents_image_statistics": {
37
+ "min_image_width": 298,
38
+ "average_image_width": 299.9848484848485,
39
+ "max_image_width": 300,
40
+ "min_image_height": 41,
41
+ "average_image_height": 265.90656565656565,
42
+ "max_image_height": 1274,
43
+ "unique_images": 792
44
+ },
45
+ "queries_text_statistics": {
46
+ "total_text_length": 46144,
47
+ "min_text_length": 4,
48
+ "average_text_length": 51.84719101123596,
49
+ "max_text_length": 533,
50
+ "unique_texts": 871
51
+ },
52
+ "queries_image_statistics": null,
53
+ "relevant_docs_statistics": {
54
+ "num_relevant_docs": 890,
55
+ "min_relevant_docs_per_query": 1,
56
+ "average_relevant_docs_per_query": 1.0,
57
+ "max_relevant_docs_per_query": 1,
58
+ "unique_relevant_docs": 792
59
+ },
60
+ "top_ranked_statistics": null
41
61
  },
42
62
  "bg": {
43
- "number_of_characters": 40682,
44
63
  "num_samples": 1666,
45
- "num_queries": 860,
46
- "num_documents": 806,
47
- "min_document_length": 0,
48
- "average_document_length": 0,
49
- "max_document_length": 0,
50
- "unique_documents": 0,
51
- "num_document_images": 806,
52
- "min_query_length": 4,
53
- "average_query_length": 47.3046511627907,
54
- "max_query_length": 771,
55
- "unique_queries": 830,
56
- "num_query_images": 0,
57
- "min_relevant_docs_per_query": 1,
58
- "average_relevant_docs_per_query": 1.0,
59
- "max_relevant_docs_per_query": 1,
60
- "unique_relevant_docs": 806
64
+ "number_of_characters": 40682,
65
+ "documents_text_statistics": null,
66
+ "documents_image_statistics": {
67
+ "min_image_width": 299,
68
+ "average_image_width": 299.9863523573201,
69
+ "max_image_width": 300,
70
+ "min_image_height": 52,
71
+ "average_image_height": 290.2258064516129,
72
+ "max_image_height": 803,
73
+ "unique_images": 806
74
+ },
75
+ "queries_text_statistics": {
76
+ "total_text_length": 40682,
77
+ "min_text_length": 4,
78
+ "average_text_length": 47.3046511627907,
79
+ "max_text_length": 771,
80
+ "unique_texts": 830
81
+ },
82
+ "queries_image_statistics": null,
83
+ "relevant_docs_statistics": {
84
+ "num_relevant_docs": 860,
85
+ "min_relevant_docs_per_query": 1,
86
+ "average_relevant_docs_per_query": 1.0,
87
+ "max_relevant_docs_per_query": 1,
88
+ "unique_relevant_docs": 806
89
+ },
90
+ "top_ranked_statistics": null
61
91
  },
62
92
  "da": {
63
- "number_of_characters": 48235,
64
93
  "num_samples": 1705,
65
- "num_queries": 891,
66
- "num_documents": 814,
67
- "min_document_length": 0,
68
- "average_document_length": 0,
69
- "max_document_length": 0,
70
- "unique_documents": 0,
71
- "num_document_images": 814,
72
- "min_query_length": 4,
73
- "average_query_length": 54.135802469135804,
74
- "max_query_length": 537,
75
- "unique_queries": 889,
76
- "num_query_images": 0,
77
- "min_relevant_docs_per_query": 1,
78
- "average_relevant_docs_per_query": 1.0,
79
- "max_relevant_docs_per_query": 1,
80
- "unique_relevant_docs": 814
94
+ "number_of_characters": 48235,
95
+ "documents_text_statistics": null,
96
+ "documents_image_statistics": {
97
+ "min_image_width": 299,
98
+ "average_image_width": 299.9889434889435,
99
+ "max_image_width": 300,
100
+ "min_image_height": 51,
101
+ "average_image_height": 277.76781326781327,
102
+ "max_image_height": 714,
103
+ "unique_images": 814
104
+ },
105
+ "queries_text_statistics": {
106
+ "total_text_length": 48235,
107
+ "min_text_length": 4,
108
+ "average_text_length": 54.135802469135804,
109
+ "max_text_length": 537,
110
+ "unique_texts": 889
111
+ },
112
+ "queries_image_statistics": null,
113
+ "relevant_docs_statistics": {
114
+ "num_relevant_docs": 891,
115
+ "min_relevant_docs_per_query": 1,
116
+ "average_relevant_docs_per_query": 1.0,
117
+ "max_relevant_docs_per_query": 1,
118
+ "unique_relevant_docs": 814
119
+ },
120
+ "top_ranked_statistics": null
81
121
  },
82
122
  "el": {
83
- "number_of_characters": 30842,
84
123
  "num_samples": 1111,
85
- "num_queries": 570,
86
- "num_documents": 541,
87
- "min_document_length": 0,
88
- "average_document_length": 0,
89
- "max_document_length": 0,
90
- "unique_documents": 0,
91
- "num_document_images": 541,
92
- "min_query_length": 1,
93
- "average_query_length": 54.10877192982456,
94
- "max_query_length": 404,
95
- "unique_queries": 565,
96
- "num_query_images": 0,
97
- "min_relevant_docs_per_query": 1,
98
- "average_relevant_docs_per_query": 1.0,
99
- "max_relevant_docs_per_query": 1,
100
- "unique_relevant_docs": 541
124
+ "number_of_characters": 30842,
125
+ "documents_text_statistics": null,
126
+ "documents_image_statistics": {
127
+ "min_image_width": 299,
128
+ "average_image_width": 299.9796672828096,
129
+ "max_image_width": 300,
130
+ "min_image_height": 38,
131
+ "average_image_height": 263.32902033271716,
132
+ "max_image_height": 570,
133
+ "unique_images": 541
134
+ },
135
+ "queries_text_statistics": {
136
+ "total_text_length": 30842,
137
+ "min_text_length": 1,
138
+ "average_text_length": 54.10877192982456,
139
+ "max_text_length": 404,
140
+ "unique_texts": 565
141
+ },
142
+ "queries_image_statistics": null,
143
+ "relevant_docs_statistics": {
144
+ "num_relevant_docs": 570,
145
+ "min_relevant_docs_per_query": 1,
146
+ "average_relevant_docs_per_query": 1.0,
147
+ "max_relevant_docs_per_query": 1,
148
+ "unique_relevant_docs": 541
149
+ },
150
+ "top_ranked_statistics": null
101
151
  },
102
152
  "et": {
103
- "number_of_characters": 33995,
104
153
  "num_samples": 1654,
105
- "num_queries": 874,
106
- "num_documents": 780,
107
- "min_document_length": 0,
108
- "average_document_length": 0,
109
- "max_document_length": 0,
110
- "unique_documents": 0,
111
- "num_document_images": 780,
112
- "min_query_length": 3,
113
- "average_query_length": 38.89588100686499,
114
- "max_query_length": 588,
115
- "unique_queries": 750,
116
- "num_query_images": 0,
117
- "min_relevant_docs_per_query": 1,
118
- "average_relevant_docs_per_query": 1.0,
119
- "max_relevant_docs_per_query": 1,
120
- "unique_relevant_docs": 780
154
+ "number_of_characters": 33995,
155
+ "documents_text_statistics": null,
156
+ "documents_image_statistics": {
157
+ "min_image_width": 299,
158
+ "average_image_width": 299.98974358974357,
159
+ "max_image_width": 300,
160
+ "min_image_height": 58,
161
+ "average_image_height": 290.76025641025643,
162
+ "max_image_height": 801,
163
+ "unique_images": 780
164
+ },
165
+ "queries_text_statistics": {
166
+ "total_text_length": 33995,
167
+ "min_text_length": 3,
168
+ "average_text_length": 38.89588100686499,
169
+ "max_text_length": 588,
170
+ "unique_texts": 750
171
+ },
172
+ "queries_image_statistics": null,
173
+ "relevant_docs_statistics": {
174
+ "num_relevant_docs": 874,
175
+ "min_relevant_docs_per_query": 1,
176
+ "average_relevant_docs_per_query": 1.0,
177
+ "max_relevant_docs_per_query": 1,
178
+ "unique_relevant_docs": 780
179
+ },
180
+ "top_ranked_statistics": null
121
181
  },
122
182
  "id": {
123
- "number_of_characters": 45428,
124
183
  "num_samples": 1755,
125
- "num_queries": 901,
126
- "num_documents": 854,
127
- "min_document_length": 0,
128
- "average_document_length": 0,
129
- "max_document_length": 0,
130
- "unique_documents": 0,
131
- "num_document_images": 854,
132
- "min_query_length": 1,
133
- "average_query_length": 50.41953385127636,
134
- "max_query_length": 628,
135
- "unique_queries": 863,
136
- "num_query_images": 0,
137
- "min_relevant_docs_per_query": 1,
138
- "average_relevant_docs_per_query": 1.0,
139
- "max_relevant_docs_per_query": 1,
140
- "unique_relevant_docs": 854
184
+ "number_of_characters": 45428,
185
+ "documents_text_statistics": null,
186
+ "documents_image_statistics": {
187
+ "min_image_width": 299,
188
+ "average_image_width": 299.9906323185012,
189
+ "max_image_width": 300,
190
+ "min_image_height": 30,
191
+ "average_image_height": 287.95081967213116,
192
+ "max_image_height": 1043,
193
+ "unique_images": 854
194
+ },
195
+ "queries_text_statistics": {
196
+ "total_text_length": 45428,
197
+ "min_text_length": 1,
198
+ "average_text_length": 50.41953385127636,
199
+ "max_text_length": 628,
200
+ "unique_texts": 863
201
+ },
202
+ "queries_image_statistics": null,
203
+ "relevant_docs_statistics": {
204
+ "num_relevant_docs": 901,
205
+ "min_relevant_docs_per_query": 1,
206
+ "average_relevant_docs_per_query": 1.0,
207
+ "max_relevant_docs_per_query": 1,
208
+ "unique_relevant_docs": 854
209
+ },
210
+ "top_ranked_statistics": null
141
211
  },
142
212
  "ko": {
143
- "number_of_characters": 18304,
144
213
  "num_samples": 1820,
145
- "num_queries": 931,
146
- "num_documents": 889,
147
- "min_document_length": 0,
148
- "average_document_length": 0,
149
- "max_document_length": 0,
150
- "unique_documents": 0,
151
- "num_document_images": 889,
152
- "min_query_length": 2,
153
- "average_query_length": 19.66058002148228,
154
- "max_query_length": 168,
155
- "unique_queries": 905,
156
- "num_query_images": 0,
157
- "min_relevant_docs_per_query": 1,
158
- "average_relevant_docs_per_query": 1.0,
159
- "max_relevant_docs_per_query": 1,
160
- "unique_relevant_docs": 889
214
+ "number_of_characters": 18304,
215
+ "documents_text_statistics": null,
216
+ "documents_image_statistics": {
217
+ "min_image_width": 299,
218
+ "average_image_width": 299.9898762654668,
219
+ "max_image_width": 300,
220
+ "min_image_height": 25,
221
+ "average_image_height": 286.3397075365579,
222
+ "max_image_height": 877,
223
+ "unique_images": 889
224
+ },
225
+ "queries_text_statistics": {
226
+ "total_text_length": 18304,
227
+ "min_text_length": 2,
228
+ "average_text_length": 19.66058002148228,
229
+ "max_text_length": 168,
230
+ "unique_texts": 905
231
+ },
232
+ "queries_image_statistics": null,
233
+ "relevant_docs_statistics": {
234
+ "num_relevant_docs": 931,
235
+ "min_relevant_docs_per_query": 1,
236
+ "average_relevant_docs_per_query": 1.0,
237
+ "max_relevant_docs_per_query": 1,
238
+ "unique_relevant_docs": 889
239
+ },
240
+ "top_ranked_statistics": null
161
241
  },
162
242
  "ja": {
163
- "number_of_characters": 21706,
164
243
  "num_samples": 1842,
165
- "num_queries": 1000,
166
- "num_documents": 842,
167
- "min_document_length": 0,
168
- "average_document_length": 0,
169
- "max_document_length": 0,
170
- "unique_documents": 0,
171
- "num_document_images": 842,
172
- "min_query_length": 2,
173
- "average_query_length": 21.706,
174
- "max_query_length": 368,
175
- "unique_queries": 875,
176
- "num_query_images": 0,
177
- "min_relevant_docs_per_query": 1,
178
- "average_relevant_docs_per_query": 1.0,
179
- "max_relevant_docs_per_query": 1,
180
- "unique_relevant_docs": 842
244
+ "number_of_characters": 21706,
245
+ "documents_text_statistics": null,
246
+ "documents_image_statistics": {
247
+ "min_image_width": 299,
248
+ "average_image_width": 299.98218527315913,
249
+ "max_image_width": 300,
250
+ "min_image_height": 74,
251
+ "average_image_height": 276.1021377672209,
252
+ "max_image_height": 2566,
253
+ "unique_images": 842
254
+ },
255
+ "queries_text_statistics": {
256
+ "total_text_length": 21706,
257
+ "min_text_length": 2,
258
+ "average_text_length": 21.706,
259
+ "max_text_length": 368,
260
+ "unique_texts": 875
261
+ },
262
+ "queries_image_statistics": null,
263
+ "relevant_docs_statistics": {
264
+ "num_relevant_docs": 1000,
265
+ "min_relevant_docs_per_query": 1,
266
+ "average_relevant_docs_per_query": 1.0,
267
+ "max_relevant_docs_per_query": 1,
268
+ "unique_relevant_docs": 842
269
+ },
270
+ "top_ranked_statistics": null
181
271
  },
182
272
  "tr": {
183
- "number_of_characters": 33434,
184
273
  "num_samples": 1402,
185
- "num_queries": 721,
186
- "num_documents": 681,
187
- "min_document_length": 0,
188
- "average_document_length": 0,
189
- "max_document_length": 0,
190
- "unique_documents": 0,
191
- "num_document_images": 681,
192
- "min_query_length": 4,
193
- "average_query_length": 46.37170596393897,
194
- "max_query_length": 408,
195
- "unique_queries": 712,
196
- "num_query_images": 0,
197
- "min_relevant_docs_per_query": 1,
198
- "average_relevant_docs_per_query": 1.0,
199
- "max_relevant_docs_per_query": 1,
200
- "unique_relevant_docs": 681
274
+ "number_of_characters": 33434,
275
+ "documents_text_statistics": null,
276
+ "documents_image_statistics": {
277
+ "min_image_width": 298,
278
+ "average_image_width": 299.9867841409692,
279
+ "max_image_width": 300,
280
+ "min_image_height": 38,
281
+ "average_image_height": 290.3171806167401,
282
+ "max_image_height": 1043,
283
+ "unique_images": 681
284
+ },
285
+ "queries_text_statistics": {
286
+ "total_text_length": 33434,
287
+ "min_text_length": 4,
288
+ "average_text_length": 46.37170596393897,
289
+ "max_text_length": 408,
290
+ "unique_texts": 712
291
+ },
292
+ "queries_image_statistics": null,
293
+ "relevant_docs_statistics": {
294
+ "num_relevant_docs": 721,
295
+ "min_relevant_docs_per_query": 1,
296
+ "average_relevant_docs_per_query": 1.0,
297
+ "max_relevant_docs_per_query": 1,
298
+ "unique_relevant_docs": 681
299
+ },
300
+ "top_ranked_statistics": null
201
301
  },
202
302
  "vi": {
203
- "number_of_characters": 53181,
204
303
  "num_samples": 1815,
205
- "num_queries": 946,
206
- "num_documents": 869,
207
- "min_document_length": 0,
208
- "average_document_length": 0,
209
- "max_document_length": 0,
210
- "unique_documents": 0,
211
- "num_document_images": 869,
212
- "min_query_length": 3,
213
- "average_query_length": 56.21670190274841,
214
- "max_query_length": 476,
215
- "unique_queries": 921,
216
- "num_query_images": 0,
217
- "min_relevant_docs_per_query": 1,
218
- "average_relevant_docs_per_query": 1.0,
219
- "max_relevant_docs_per_query": 1,
220
- "unique_relevant_docs": 869
304
+ "number_of_characters": 53181,
305
+ "documents_text_statistics": null,
306
+ "documents_image_statistics": {
307
+ "min_image_width": 299,
308
+ "average_image_width": 299.98964326812427,
309
+ "max_image_width": 300,
310
+ "min_image_height": 27,
311
+ "average_image_height": 270.2324510932106,
312
+ "max_image_height": 763,
313
+ "unique_images": 869
314
+ },
315
+ "queries_text_statistics": {
316
+ "total_text_length": 53181,
317
+ "min_text_length": 3,
318
+ "average_text_length": 56.21670190274841,
319
+ "max_text_length": 476,
320
+ "unique_texts": 921
321
+ },
322
+ "queries_image_statistics": null,
323
+ "relevant_docs_statistics": {
324
+ "num_relevant_docs": 946,
325
+ "min_relevant_docs_per_query": 1,
326
+ "average_relevant_docs_per_query": 1.0,
327
+ "max_relevant_docs_per_query": 1,
328
+ "unique_relevant_docs": 869
329
+ },
330
+ "top_ranked_statistics": null
221
331
  },
222
332
  "en": {
223
- "number_of_characters": 57978,
224
333
  "num_samples": 1685,
225
- "num_queries": 1000,
226
- "num_documents": 685,
227
- "min_document_length": 0,
228
- "average_document_length": 0,
229
- "max_document_length": 0,
230
- "unique_documents": 0,
231
- "num_document_images": 685,
232
- "min_query_length": 4,
233
- "average_query_length": 57.978,
234
- "max_query_length": 690,
235
- "unique_queries": 895,
236
- "num_query_images": 0,
237
- "min_relevant_docs_per_query": 1,
238
- "average_relevant_docs_per_query": 1.0,
239
- "max_relevant_docs_per_query": 1,
240
- "unique_relevant_docs": 685
334
+ "number_of_characters": 57978,
335
+ "documents_text_statistics": null,
336
+ "documents_image_statistics": {
337
+ "min_image_width": 299,
338
+ "average_image_width": 299.9824817518248,
339
+ "max_image_width": 300,
340
+ "min_image_height": 47,
341
+ "average_image_height": 279.4277372262774,
342
+ "max_image_height": 1274,
343
+ "unique_images": 685
344
+ },
345
+ "queries_text_statistics": {
346
+ "total_text_length": 57978,
347
+ "min_text_length": 4,
348
+ "average_text_length": 57.978,
349
+ "max_text_length": 690,
350
+ "unique_texts": 895
351
+ },
352
+ "queries_image_statistics": null,
353
+ "relevant_docs_statistics": {
354
+ "num_relevant_docs": 1000,
355
+ "min_relevant_docs_per_query": 1,
356
+ "average_relevant_docs_per_query": 1.0,
357
+ "max_relevant_docs_per_query": 1,
358
+ "unique_relevant_docs": 685
359
+ },
360
+ "top_ranked_statistics": null
241
361
  }
242
362
  }
243
363
  }
@@ -1,22 +1,32 @@
1
1
  {
2
2
  "test": {
3
- "number_of_characters": 578340,
4
- "num_samples": 7685,
5
- "num_queries": 697,
6
- "num_documents": 6988,
7
- "min_document_length": 0,
8
- "average_document_length": 82.76187750429307,
9
- "max_document_length": 1026,
10
- "unique_documents": 6939,
11
- "num_document_images": 0,
12
- "min_query_length": 0,
13
- "average_query_length": 0,
14
- "max_query_length": 0,
15
- "unique_queries": 0,
16
- "num_query_images": 697,
17
- "min_relevant_docs_per_query": 1,
18
- "average_relevant_docs_per_query": 1.0,
19
- "max_relevant_docs_per_query": 1,
20
- "unique_relevant_docs": 697
3
+ "num_samples": 7684,
4
+ "number_of_characters": 578436,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 578436,
7
+ "min_text_length": 2,
8
+ "average_text_length": 82.78746243022756,
9
+ "max_text_length": 1026,
10
+ "unique_texts": 6939
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": null,
14
+ "queries_image_statistics": {
15
+ "min_image_width": 160,
16
+ "average_image_width": 578.7604017216643,
17
+ "max_image_width": 2581,
18
+ "min_image_height": 168,
19
+ "average_image_height": 547.1893830703013,
20
+ "max_image_height": 2248,
21
+ "unique_images": 697
22
+ },
23
+ "relevant_docs_statistics": {
24
+ "num_relevant_docs": 697,
25
+ "min_relevant_docs_per_query": 1,
26
+ "average_relevant_docs_per_query": 1.0,
27
+ "max_relevant_docs_per_query": 1,
28
+ "unique_relevant_docs": 697
29
+ },
30
+ "top_ranked_statistics": null
21
31
  }
22
32
  }