mteb 2.1.0__py3-none-any.whl → 2.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. mteb/_create_dataloaders.py +2 -0
  2. mteb/_evaluators/retrieval_metrics.py +0 -9
  3. mteb/abstasks/_stratification.py +1 -1
  4. mteb/abstasks/abstask.py +6 -1
  5. mteb/abstasks/dataset_card_template.md +1 -1
  6. mteb/abstasks/retrieval.py +2 -1
  7. mteb/abstasks/retrieval_dataset_loaders.py +1 -1
  8. mteb/abstasks/task_metadata.py +1 -1
  9. mteb/benchmarks/benchmarks/benchmarks.py +9 -13
  10. mteb/benchmarks/get_benchmark.py +1 -1
  11. mteb/descriptive_stats/Image/Any2AnyMultilingualRetrieval/XFlickr30kCoT2IRetrieval.json +243 -153
  12. mteb/descriptive_stats/Image/Any2AnyMultilingualRetrieval/XM3600T2IRetrieval.json +999 -629
  13. mteb/descriptive_stats/Image/Any2AnyRetrieval/OVENIT2TRetrieval.json +33 -17
  14. mteb/descriptive_stats/Image/DocumentUnderstanding/MIRACLVisionRetrieval.json +574 -0
  15. mteb/descriptive_stats/Retrieval/ClimateFEVERHardNegatives.v2.json +30 -0
  16. mteb/descriptive_stats/Retrieval/DBPediaHardNegatives.v2.json +30 -0
  17. mteb/descriptive_stats/Retrieval/FEVERHardNegatives.v2.json +30 -0
  18. mteb/descriptive_stats/Retrieval/HotpotQAHardNegatives.v2.json +30 -0
  19. mteb/descriptive_stats/Retrieval/QuoraRetrievalHardNegatives.v2.json +30 -0
  20. mteb/descriptive_stats/Retrieval/RiaNewsRetrievalHardNegatives.v2.json +30 -0
  21. mteb/descriptive_stats/Retrieval/VDRMultilingualRetrieval.json +184 -0
  22. mteb/languages/check_language_code.py +11 -3
  23. mteb/languages/language_scripts.py +4 -0
  24. mteb/leaderboard/app.py +1 -1
  25. mteb/leaderboard/benchmark_selector.py +1 -0
  26. mteb/leaderboard/text_segments.py +1 -1
  27. mteb/models/model_implementations/b1ade_models.py +1 -1
  28. mteb/models/model_implementations/bge_models.py +1 -3
  29. mteb/models/model_implementations/bmretriever_models.py +1 -1
  30. mteb/models/model_implementations/gme_v_models.py +2 -2
  31. mteb/models/model_implementations/ibm_granite_models.py +1 -1
  32. mteb/models/model_implementations/inf_models.py +3 -3
  33. mteb/models/model_implementations/jina_models.py +12 -2
  34. mteb/models/model_implementations/llm2vec_models.py +1 -1
  35. mteb/models/model_implementations/misc_models.py +2 -2
  36. mteb/models/model_implementations/mxbai_models.py +1 -1
  37. mteb/models/model_implementations/reasonir_model.py +1 -1
  38. mteb/models/model_implementations/salesforce_models.py +1 -1
  39. mteb/models/model_implementations/seed_1_6_embedding_models.py +1 -1
  40. mteb/models/model_implementations/voyage_v.py +9 -9
  41. mteb/results/task_result.py +6 -8
  42. mteb/tasks/classification/dan/angry_tweets_classification.py +2 -2
  43. mteb/tasks/classification/eng/legal_bench_classification.py +3 -3
  44. mteb/tasks/classification/mya/myanmar_news.py +2 -2
  45. mteb/tasks/classification/tha/wongnai_reviews_classification.py +1 -1
  46. mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -2
  47. mteb/tasks/pair_classification/multilingual/indic_xnli_pair_classification.py +9 -8
  48. mteb/tasks/retrieval/code/code_rag.py +8 -8
  49. mteb/tasks/retrieval/dan/dan_fever_retrieval.py +1 -1
  50. mteb/tasks/retrieval/dan/tv2_nordretrieval.py +2 -2
  51. mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +2 -2
  52. mteb/tasks/retrieval/eng/__init__.py +18 -4
  53. mteb/tasks/retrieval/eng/climate_fever_retrieval.py +68 -77
  54. mteb/tasks/retrieval/eng/dbpedia_retrieval.py +55 -50
  55. mteb/tasks/retrieval/eng/fever_retrieval.py +62 -67
  56. mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +0 -4
  57. mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +0 -4
  58. mteb/tasks/retrieval/eng/hotpot_qa_retrieval.py +57 -67
  59. mteb/tasks/retrieval/eng/legal_summarization_retrieval.py +1 -1
  60. mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +0 -3
  61. mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +0 -2
  62. mteb/tasks/retrieval/eng/oven_it2t_retrieval.py +1 -1
  63. mteb/tasks/retrieval/eng/quora_retrieval.py +51 -46
  64. mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +0 -4
  65. mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +0 -4
  66. mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +0 -2
  67. mteb/tasks/retrieval/jpn/ja_gov_faqs_retrieval.py +1 -1
  68. mteb/tasks/retrieval/multilingual/belebele_retrieval.py +1 -1
  69. mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +0 -2
  70. mteb/tasks/retrieval/multilingual/miracl_retrieval.py +1 -1
  71. mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +2 -9
  72. mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +0 -2
  73. mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +0 -2
  74. mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +6 -5
  75. mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +3 -4
  76. mteb/tasks/retrieval/nob/norquad.py +2 -2
  77. mteb/tasks/retrieval/nob/snl_retrieval.py +2 -2
  78. mteb/tasks/retrieval/rus/__init__.py +11 -2
  79. mteb/tasks/retrieval/rus/ria_news_retrieval.py +48 -44
  80. mteb/tasks/retrieval/tur/tur_hist_quad.py +2 -2
  81. {mteb-2.1.0.dist-info → mteb-2.1.2.dist-info}/METADATA +5 -5
  82. {mteb-2.1.0.dist-info → mteb-2.1.2.dist-info}/RECORD +86 -91
  83. mteb/descriptive_stats/Classification/PersianTextTone.json +0 -56
  84. mteb/descriptive_stats/Image/Any2TextMutipleChoice/CVBenchCount.json +0 -37
  85. mteb/descriptive_stats/Image/Any2TextMutipleChoice/CVBenchDepth.json +0 -25
  86. mteb/descriptive_stats/Image/Any2TextMutipleChoice/CVBenchDistance.json +0 -25
  87. mteb/descriptive_stats/Image/Any2TextMutipleChoice/CVBenchRelation.json +0 -25
  88. mteb/descriptive_stats/Image/VisualSTS/STS12VisualSTS.json +0 -20
  89. mteb/descriptive_stats/Image/VisualSTS/STS13VisualSTS.json +0 -20
  90. mteb/descriptive_stats/Image/VisualSTS/STS14VisualSTS.json +0 -20
  91. mteb/descriptive_stats/Image/VisualSTS/STS15VisualSTS.json +0 -20
  92. mteb/descriptive_stats/Image/VisualSTS/STS16VisualSTS.json +0 -20
  93. mteb/descriptive_stats/Image/VisualSTS/STS17MultilingualVisualSTS.json +0 -220
  94. mteb/descriptive_stats/Image/VisualSTS/STSBenchmarkMultilingualVisualSTS.json +0 -402
  95. mteb/descriptive_stats/Reranking/InstructIR.json +0 -31
  96. {mteb-2.1.0.dist-info → mteb-2.1.2.dist-info}/WHEEL +0 -0
  97. {mteb-2.1.0.dist-info → mteb-2.1.2.dist-info}/entry_points.txt +0 -0
  98. {mteb-2.1.0.dist-info → mteb-2.1.2.dist-info}/licenses/LICENSE +0 -0
  99. {mteb-2.1.0.dist-info → mteb-2.1.2.dist-info}/top_level.txt +0 -0
@@ -1,22 +1,38 @@
1
1
  {
2
2
  "test": {
3
- "number_of_characters": 356548734,
4
3
  "num_samples": 726671,
5
- "num_queries": 50004,
6
- "num_documents": 676667,
7
- "min_document_length": 2,
8
- "average_document_length": 524.4973081294048,
9
- "max_document_length": 60546,
10
- "unique_documents": 676667,
11
- "num_document_images": 0,
12
- "min_query_length": 11,
13
- "average_query_length": 32.771658267338616,
14
- "max_query_length": 78,
15
- "unique_queries": 1540,
16
- "num_query_images": 50004,
17
- "min_relevant_docs_per_query": 1,
18
- "average_relevant_docs_per_query": 9.852291816654668,
19
- "max_relevant_docs_per_query": 174,
20
- "unique_relevant_docs": 24471
4
+ "number_of_characters": 356548734,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 354910020,
7
+ "min_text_length": 2,
8
+ "average_text_length": 524.4973081294048,
9
+ "max_text_length": 60546,
10
+ "unique_texts": 676667
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 1638714,
15
+ "min_text_length": 11,
16
+ "average_text_length": 32.771658267338616,
17
+ "max_text_length": 78,
18
+ "unique_texts": 1540
19
+ },
20
+ "queries_image_statistics": {
21
+ "min_image_width": 256,
22
+ "average_image_width": 285.66400687944963,
23
+ "max_image_width": 1365,
24
+ "min_image_height": 256,
25
+ "average_image_height": 266.91002719782415,
26
+ "max_image_height": 1079,
27
+ "unique_images": 49917
28
+ },
29
+ "relevant_docs_statistics": {
30
+ "num_relevant_docs": 492654,
31
+ "min_relevant_docs_per_query": 1,
32
+ "average_relevant_docs_per_query": 9.852291816654668,
33
+ "max_relevant_docs_per_query": 174,
34
+ "unique_relevant_docs": 24471
35
+ },
36
+ "top_ranked_statistics": null
21
37
  }
22
38
  }
@@ -0,0 +1,574 @@
1
+ {
2
+ "default": {
3
+ "num_samples": 346632,
4
+ "number_of_characters": 277317,
5
+ "documents_text_statistics": null,
6
+ "documents_image_statistics": {
7
+ "min_image_width": 980,
8
+ "average_image_width": 980.0003660689508,
9
+ "max_image_width": 1104,
10
+ "min_image_height": 980,
11
+ "average_image_height": 980.0024650610803,
12
+ "max_image_height": 1815,
13
+ "unique_images": 338136
14
+ },
15
+ "queries_text_statistics": {
16
+ "total_text_length": 277317,
17
+ "min_text_length": 5,
18
+ "average_text_length": 35.11230691314257,
19
+ "max_text_length": 176,
20
+ "unique_texts": 7894
21
+ },
22
+ "queries_image_statistics": null,
23
+ "relevant_docs_statistics": {
24
+ "num_relevant_docs": 10094,
25
+ "min_relevant_docs_per_query": 1,
26
+ "average_relevant_docs_per_query": 1.2780450747024563,
27
+ "max_relevant_docs_per_query": 10,
28
+ "unique_relevant_docs": 9411
29
+ },
30
+ "top_ranked_statistics": null,
31
+ "hf_subset_descriptive_stats": {
32
+ "ar": {
33
+ "num_samples": 77571,
34
+ "number_of_characters": 60214,
35
+ "documents_text_statistics": null,
36
+ "documents_image_statistics": {
37
+ "min_image_width": 980,
38
+ "average_image_width": 980.0,
39
+ "max_image_width": 980,
40
+ "min_image_height": 980,
41
+ "average_image_height": 980.0,
42
+ "max_image_height": 980,
43
+ "unique_images": 75393
44
+ },
45
+ "queries_text_statistics": {
46
+ "total_text_length": 60214,
47
+ "min_text_length": 12,
48
+ "average_text_length": 28.309355900329102,
49
+ "max_text_length": 101,
50
+ "unique_texts": 2127
51
+ },
52
+ "queries_image_statistics": null,
53
+ "relevant_docs_statistics": {
54
+ "num_relevant_docs": 2558,
55
+ "min_relevant_docs_per_query": 1,
56
+ "average_relevant_docs_per_query": 1.2026328161730135,
57
+ "max_relevant_docs_per_query": 9,
58
+ "unique_relevant_docs": 2328
59
+ },
60
+ "top_ranked_statistics": null
61
+ },
62
+ "bn": {
63
+ "num_samples": 8724,
64
+ "number_of_characters": 10743,
65
+ "documents_text_statistics": null,
66
+ "documents_image_statistics": {
67
+ "min_image_width": 980,
68
+ "average_image_width": 980.0,
69
+ "max_image_width": 980,
70
+ "min_image_height": 980,
71
+ "average_image_height": 980.0,
72
+ "max_image_height": 980,
73
+ "unique_images": 8495
74
+ },
75
+ "queries_text_statistics": {
76
+ "total_text_length": 10743,
77
+ "min_text_length": 16,
78
+ "average_text_length": 46.91266375545852,
79
+ "max_text_length": 112,
80
+ "unique_texts": 229
81
+ },
82
+ "queries_image_statistics": null,
83
+ "relevant_docs_statistics": {
84
+ "num_relevant_docs": 328,
85
+ "min_relevant_docs_per_query": 1,
86
+ "average_relevant_docs_per_query": 1.4323144104803494,
87
+ "max_relevant_docs_per_query": 7,
88
+ "unique_relevant_docs": 289
89
+ },
90
+ "top_ranked_statistics": null
91
+ },
92
+ "de": {
93
+ "num_samples": 6431,
94
+ "number_of_characters": 5647,
95
+ "documents_text_statistics": null,
96
+ "documents_image_statistics": {
97
+ "min_image_width": 980,
98
+ "average_image_width": 980.0,
99
+ "max_image_width": 980,
100
+ "min_image_height": 980,
101
+ "average_image_height": 980.0,
102
+ "max_image_height": 980,
103
+ "unique_images": 6268
104
+ },
105
+ "queries_text_statistics": {
106
+ "total_text_length": 5647,
107
+ "min_text_length": 15,
108
+ "average_text_length": 43.775193798449614,
109
+ "max_text_length": 86,
110
+ "unique_texts": 128
111
+ },
112
+ "queries_image_statistics": null,
113
+ "relevant_docs_statistics": {
114
+ "num_relevant_docs": 177,
115
+ "min_relevant_docs_per_query": 1,
116
+ "average_relevant_docs_per_query": 1.372093023255814,
117
+ "max_relevant_docs_per_query": 6,
118
+ "unique_relevant_docs": 175
119
+ },
120
+ "top_ranked_statistics": null
121
+ },
122
+ "en": {
123
+ "num_samples": 43418,
124
+ "number_of_characters": 18094,
125
+ "documents_text_statistics": null,
126
+ "documents_image_statistics": {
127
+ "min_image_width": 980,
128
+ "average_image_width": 980.0,
129
+ "max_image_width": 980,
130
+ "min_image_height": 980,
131
+ "average_image_height": 980.0,
132
+ "max_image_height": 980,
133
+ "unique_images": 42765
134
+ },
135
+ "queries_text_statistics": {
136
+ "total_text_length": 18094,
137
+ "min_text_length": 18,
138
+ "average_text_length": 40.47874720357942,
139
+ "max_text_length": 122,
140
+ "unique_texts": 447
141
+ },
142
+ "queries_image_statistics": null,
143
+ "relevant_docs_statistics": {
144
+ "num_relevant_docs": 623,
145
+ "min_relevant_docs_per_query": 1,
146
+ "average_relevant_docs_per_query": 1.3937360178970917,
147
+ "max_relevant_docs_per_query": 7,
148
+ "unique_relevant_docs": 607
149
+ },
150
+ "top_ranked_statistics": null
151
+ },
152
+ "es": {
153
+ "num_samples": 18118,
154
+ "number_of_characters": 17104,
155
+ "documents_text_statistics": null,
156
+ "documents_image_statistics": {
157
+ "min_image_width": 980,
158
+ "average_image_width": 980.0,
159
+ "max_image_width": 980,
160
+ "min_image_height": 980,
161
+ "average_image_height": 980.0,
162
+ "max_image_height": 980,
163
+ "unique_images": 17711
164
+ },
165
+ "queries_text_statistics": {
166
+ "total_text_length": 17104,
167
+ "min_text_length": 19,
168
+ "average_text_length": 46.35230352303523,
169
+ "max_text_length": 84,
170
+ "unique_texts": 369
171
+ },
172
+ "queries_image_statistics": null,
173
+ "relevant_docs_statistics": {
174
+ "num_relevant_docs": 595,
175
+ "min_relevant_docs_per_query": 1,
176
+ "average_relevant_docs_per_query": 1.6124661246612466,
177
+ "max_relevant_docs_per_query": 7,
178
+ "unique_relevant_docs": 594
179
+ },
180
+ "top_ranked_statistics": null
181
+ },
182
+ "fa": {
183
+ "num_samples": 16188,
184
+ "number_of_characters": 14282,
185
+ "documents_text_statistics": null,
186
+ "documents_image_statistics": {
187
+ "min_image_width": 980,
188
+ "average_image_width": 980.0,
189
+ "max_image_width": 980,
190
+ "min_image_height": 980,
191
+ "average_image_height": 980.0,
192
+ "max_image_height": 980,
193
+ "unique_images": 15846
194
+ },
195
+ "queries_text_statistics": {
196
+ "total_text_length": 14282,
197
+ "min_text_length": 18,
198
+ "average_text_length": 41.760233918128655,
199
+ "max_text_length": 82,
200
+ "unique_texts": 341
201
+ },
202
+ "queries_image_statistics": null,
203
+ "relevant_docs_statistics": {
204
+ "num_relevant_docs": 464,
205
+ "min_relevant_docs_per_query": 1,
206
+ "average_relevant_docs_per_query": 1.3567251461988303,
207
+ "max_relevant_docs_per_query": 7,
208
+ "unique_relevant_docs": 455
209
+ },
210
+ "top_ranked_statistics": null
211
+ },
212
+ "fi": {
213
+ "num_samples": 34470,
214
+ "number_of_characters": 28921,
215
+ "documents_text_statistics": null,
216
+ "documents_image_statistics": {
217
+ "min_image_width": 980,
218
+ "average_image_width": 980.0,
219
+ "max_image_width": 980,
220
+ "min_image_height": 980,
221
+ "average_image_height": 980.0,
222
+ "max_image_height": 980,
223
+ "unique_images": 33613
224
+ },
225
+ "queries_text_statistics": {
226
+ "total_text_length": 28921,
227
+ "min_text_length": 14,
228
+ "average_text_length": 36.56257901390645,
229
+ "max_text_length": 130,
230
+ "unique_texts": 791
231
+ },
232
+ "queries_image_statistics": null,
233
+ "relevant_docs_statistics": {
234
+ "num_relevant_docs": 876,
235
+ "min_relevant_docs_per_query": 1,
236
+ "average_relevant_docs_per_query": 1.1074589127686474,
237
+ "max_relevant_docs_per_query": 4,
238
+ "unique_relevant_docs": 828
239
+ },
240
+ "top_ranked_statistics": null
241
+ },
242
+ "fr": {
243
+ "num_samples": 7132,
244
+ "number_of_characters": 5797,
245
+ "documents_text_statistics": null,
246
+ "documents_image_statistics": {
247
+ "min_image_width": 980,
248
+ "average_image_width": 980.0,
249
+ "max_image_width": 980,
250
+ "min_image_height": 980,
251
+ "average_image_height": 980.0,
252
+ "max_image_height": 980,
253
+ "unique_images": 6987
254
+ },
255
+ "queries_text_statistics": {
256
+ "total_text_length": 5797,
257
+ "min_text_length": 16,
258
+ "average_text_length": 40.82394366197183,
259
+ "max_text_length": 74,
260
+ "unique_texts": 142
261
+ },
262
+ "queries_image_statistics": null,
263
+ "relevant_docs_statistics": {
264
+ "num_relevant_docs": 170,
265
+ "min_relevant_docs_per_query": 1,
266
+ "average_relevant_docs_per_query": 1.1971830985915493,
267
+ "max_relevant_docs_per_query": 4,
268
+ "unique_relevant_docs": 170
269
+ },
270
+ "top_ranked_statistics": null
271
+ },
272
+ "hi": {
273
+ "num_samples": 8188,
274
+ "number_of_characters": 9480,
275
+ "documents_text_statistics": null,
276
+ "documents_image_statistics": {
277
+ "min_image_width": 980,
278
+ "average_image_width": 980.0,
279
+ "max_image_width": 980,
280
+ "min_image_height": 980,
281
+ "average_image_height": 980.0,
282
+ "max_image_height": 980,
283
+ "unique_images": 8002
284
+ },
285
+ "queries_text_statistics": {
286
+ "total_text_length": 9480,
287
+ "min_text_length": 24,
288
+ "average_text_length": 51.52173913043478,
289
+ "max_text_length": 116,
290
+ "unique_texts": 184
291
+ },
292
+ "queries_image_statistics": null,
293
+ "relevant_docs_statistics": {
294
+ "num_relevant_docs": 228,
295
+ "min_relevant_docs_per_query": 1,
296
+ "average_relevant_docs_per_query": 1.2391304347826086,
297
+ "max_relevant_docs_per_query": 4,
298
+ "unique_relevant_docs": 220
299
+ },
300
+ "top_ranked_statistics": null
301
+ },
302
+ "id": {
303
+ "num_samples": 24445,
304
+ "number_of_characters": 22513,
305
+ "documents_text_statistics": null,
306
+ "documents_image_statistics": {
307
+ "min_image_width": 980,
308
+ "average_image_width": 980.0,
309
+ "max_image_width": 980,
310
+ "min_image_height": 980,
311
+ "average_image_height": 980.0,
312
+ "max_image_height": 980,
313
+ "unique_images": 23821
314
+ },
315
+ "queries_text_statistics": {
316
+ "total_text_length": 22513,
317
+ "min_text_length": 13,
318
+ "average_text_length": 37.33499170812603,
319
+ "max_text_length": 93,
320
+ "unique_texts": 603
321
+ },
322
+ "queries_image_statistics": null,
323
+ "relevant_docs_statistics": {
324
+ "num_relevant_docs": 937,
325
+ "min_relevant_docs_per_query": 1,
326
+ "average_relevant_docs_per_query": 1.5538971807628523,
327
+ "max_relevant_docs_per_query": 9,
328
+ "unique_relevant_docs": 855
329
+ },
330
+ "top_ranked_statistics": null
331
+ },
332
+ "ja": {
333
+ "num_samples": 18296,
334
+ "number_of_characters": 6822,
335
+ "documents_text_statistics": null,
336
+ "documents_image_statistics": {
337
+ "min_image_width": 980,
338
+ "average_image_width": 980.0,
339
+ "max_image_width": 980,
340
+ "min_image_height": 980,
341
+ "average_image_height": 980.0,
342
+ "max_image_height": 980,
343
+ "unique_images": 17883
344
+ },
345
+ "queries_text_statistics": {
346
+ "total_text_length": 6822,
347
+ "min_text_length": 8,
348
+ "average_text_length": 17.627906976744185,
349
+ "max_text_length": 42,
350
+ "unique_texts": 387
351
+ },
352
+ "queries_image_statistics": null,
353
+ "relevant_docs_statistics": {
354
+ "num_relevant_docs": 489,
355
+ "min_relevant_docs_per_query": 1,
356
+ "average_relevant_docs_per_query": 1.2635658914728682,
357
+ "max_relevant_docs_per_query": 7,
358
+ "unique_relevant_docs": 465
359
+ },
360
+ "top_ranked_statistics": null
361
+ },
362
+ "ko": {
363
+ "num_samples": 5830,
364
+ "number_of_characters": 2549,
365
+ "documents_text_statistics": null,
366
+ "documents_image_statistics": {
367
+ "min_image_width": 980,
368
+ "average_image_width": 980.0,
369
+ "max_image_width": 980,
370
+ "min_image_height": 980,
371
+ "average_image_height": 980.0,
372
+ "max_image_height": 980,
373
+ "unique_images": 5700
374
+ },
375
+ "queries_text_statistics": {
376
+ "total_text_length": 2549,
377
+ "min_text_length": 5,
378
+ "average_text_length": 19.607692307692307,
379
+ "max_text_length": 92,
380
+ "unique_texts": 130
381
+ },
382
+ "queries_image_statistics": null,
383
+ "relevant_docs_statistics": {
384
+ "num_relevant_docs": 242,
385
+ "min_relevant_docs_per_query": 1,
386
+ "average_relevant_docs_per_query": 1.8615384615384616,
387
+ "max_relevant_docs_per_query": 9,
388
+ "unique_relevant_docs": 216
389
+ },
390
+ "top_ranked_statistics": null
391
+ },
392
+ "ru": {
393
+ "num_samples": 25765,
394
+ "number_of_characters": 23959,
395
+ "documents_text_statistics": null,
396
+ "documents_image_statistics": {
397
+ "min_image_width": 980,
398
+ "average_image_width": 980.004920439665,
399
+ "max_image_width": 1104,
400
+ "min_image_height": 980,
401
+ "average_image_height": 980.0331336058093,
402
+ "max_image_height": 1815,
403
+ "unique_images": 25182
404
+ },
405
+ "queries_text_statistics": {
406
+ "total_text_length": 23959,
407
+ "min_text_length": 15,
408
+ "average_text_length": 42.480496453900706,
409
+ "max_text_length": 108,
410
+ "unique_texts": 564
411
+ },
412
+ "queries_image_statistics": null,
413
+ "relevant_docs_statistics": {
414
+ "num_relevant_docs": 695,
415
+ "min_relevant_docs_per_query": 1,
416
+ "average_relevant_docs_per_query": 1.2322695035460993,
417
+ "max_relevant_docs_per_query": 7,
418
+ "unique_relevant_docs": 660
419
+ },
420
+ "top_ranked_statistics": null
421
+ },
422
+ "sw": {
423
+ "num_samples": 7405,
424
+ "number_of_characters": 8971,
425
+ "documents_text_statistics": null,
426
+ "documents_image_statistics": {
427
+ "min_image_width": 980,
428
+ "average_image_width": 980.0,
429
+ "max_image_width": 980,
430
+ "min_image_height": 980,
431
+ "average_image_height": 980.0,
432
+ "max_image_height": 980,
433
+ "unique_images": 7164
434
+ },
435
+ "queries_text_statistics": {
436
+ "total_text_length": 8971,
437
+ "min_text_length": 13,
438
+ "average_text_length": 37.53556485355649,
439
+ "max_text_length": 80,
440
+ "unique_texts": 239
441
+ },
442
+ "queries_image_statistics": null,
443
+ "relevant_docs_statistics": {
444
+ "num_relevant_docs": 290,
445
+ "min_relevant_docs_per_query": 1,
446
+ "average_relevant_docs_per_query": 1.213389121338912,
447
+ "max_relevant_docs_per_query": 7,
448
+ "unique_relevant_docs": 240
449
+ },
450
+ "top_ranked_statistics": null
451
+ },
452
+ "te": {
453
+ "num_samples": 15909,
454
+ "number_of_characters": 17919,
455
+ "documents_text_statistics": null,
456
+ "documents_image_statistics": {
457
+ "min_image_width": 980,
458
+ "average_image_width": 980.0,
459
+ "max_image_width": 980,
460
+ "min_image_height": 980,
461
+ "average_image_height": 980.0,
462
+ "max_image_height": 980,
463
+ "unique_images": 15313
464
+ },
465
+ "queries_text_statistics": {
466
+ "total_text_length": 17919,
467
+ "min_text_length": 14,
468
+ "average_text_length": 37.33125,
469
+ "max_text_length": 85,
470
+ "unique_texts": 480
471
+ },
472
+ "queries_image_statistics": null,
473
+ "relevant_docs_statistics": {
474
+ "num_relevant_docs": 490,
475
+ "min_relevant_docs_per_query": 1,
476
+ "average_relevant_docs_per_query": 1.0208333333333333,
477
+ "max_relevant_docs_per_query": 3,
478
+ "unique_relevant_docs": 452
479
+ },
480
+ "top_ranked_statistics": null
481
+ },
482
+ "th": {
483
+ "num_samples": 16764,
484
+ "number_of_characters": 18707,
485
+ "documents_text_statistics": null,
486
+ "documents_image_statistics": {
487
+ "min_image_width": 980,
488
+ "average_image_width": 980.0,
489
+ "max_image_width": 980,
490
+ "min_image_height": 980,
491
+ "average_image_height": 980.0,
492
+ "max_image_height": 980,
493
+ "unique_images": 16307
494
+ },
495
+ "queries_text_statistics": {
496
+ "total_text_length": 18707,
497
+ "min_text_length": 14,
498
+ "average_text_length": 41.47893569844789,
499
+ "max_text_length": 176,
500
+ "unique_texts": 451
501
+ },
502
+ "queries_image_statistics": null,
503
+ "relevant_docs_statistics": {
504
+ "num_relevant_docs": 555,
505
+ "min_relevant_docs_per_query": 1,
506
+ "average_relevant_docs_per_query": 1.23059866962306,
507
+ "max_relevant_docs_per_query": 6,
508
+ "unique_relevant_docs": 495
509
+ },
510
+ "top_ranked_statistics": null
511
+ },
512
+ "yo": {
513
+ "num_samples": 3117,
514
+ "number_of_characters": 3510,
515
+ "documents_text_statistics": null,
516
+ "documents_image_statistics": {
517
+ "min_image_width": 980,
518
+ "average_image_width": 980.0,
519
+ "max_image_width": 980,
520
+ "min_image_height": 980,
521
+ "average_image_height": 980.0,
522
+ "max_image_height": 980,
523
+ "unique_images": 3017
524
+ },
525
+ "queries_text_statistics": {
526
+ "total_text_length": 3510,
527
+ "min_text_length": 25,
528
+ "average_text_length": 36.94736842105263,
529
+ "max_text_length": 56,
530
+ "unique_texts": 95
531
+ },
532
+ "queries_image_statistics": null,
533
+ "relevant_docs_statistics": {
534
+ "num_relevant_docs": 108,
535
+ "min_relevant_docs_per_query": 1,
536
+ "average_relevant_docs_per_query": 1.1368421052631579,
537
+ "max_relevant_docs_per_query": 4,
538
+ "unique_relevant_docs": 104
539
+ },
540
+ "top_ranked_statistics": null
541
+ },
542
+ "zh": {
543
+ "num_samples": 8861,
544
+ "number_of_characters": 2085,
545
+ "documents_text_statistics": null,
546
+ "documents_image_statistics": {
547
+ "min_image_width": 980,
548
+ "average_image_width": 980.0,
549
+ "max_image_width": 980,
550
+ "min_image_height": 980,
551
+ "average_image_height": 980.0,
552
+ "max_image_height": 980,
553
+ "unique_images": 8669
554
+ },
555
+ "queries_text_statistics": {
556
+ "total_text_length": 2085,
557
+ "min_text_length": 7,
558
+ "average_text_length": 11.031746031746032,
559
+ "max_text_length": 20,
560
+ "unique_texts": 187
561
+ },
562
+ "queries_image_statistics": null,
563
+ "relevant_docs_statistics": {
564
+ "num_relevant_docs": 269,
565
+ "min_relevant_docs_per_query": 1,
566
+ "average_relevant_docs_per_query": 1.4232804232804233,
567
+ "max_relevant_docs_per_query": 10,
568
+ "unique_relevant_docs": 258
569
+ },
570
+ "top_ranked_statistics": null
571
+ }
572
+ }
573
+ }
574
+ }
@@ -0,0 +1,30 @@
1
+ {
2
+ "test": {
3
+ "num_samples": 48416,
4
+ "number_of_characters": 59218442,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 59096563,
7
+ "min_text_length": 1,
8
+ "average_text_length": 1246.3422262527417,
9
+ "max_text_length": 36320,
10
+ "unique_texts": 47416
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 121879,
15
+ "min_text_length": 29,
16
+ "average_text_length": 121.879,
17
+ "max_text_length": 406,
18
+ "unique_texts": 1000
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 3048,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 3.048,
25
+ "max_relevant_docs_per_query": 5,
26
+ "unique_relevant_docs": 1042
27
+ },
28
+ "top_ranked_statistics": null
29
+ }
30
+ }