mteb 2.0.5__py3-none-any.whl → 2.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (156) hide show
  1. mteb/__init__.py +10 -1
  2. mteb/_create_dataloaders.py +2 -0
  3. mteb/abstasks/_stratification.py +1 -1
  4. mteb/abstasks/abstask.py +6 -1
  5. mteb/abstasks/dataset_card_template.md +1 -1
  6. mteb/abstasks/retrieval.py +2 -1
  7. mteb/abstasks/retrieval_dataset_loaders.py +1 -1
  8. mteb/abstasks/task_metadata.py +1 -1
  9. mteb/benchmarks/benchmarks/__init__.py +2 -0
  10. mteb/benchmarks/benchmarks/benchmarks.py +82 -11
  11. mteb/benchmarks/get_benchmark.py +1 -1
  12. mteb/descriptive_stats/Classification/DutchColaClassification.json +54 -0
  13. mteb/descriptive_stats/Classification/DutchGovernmentBiasClassification.json +54 -0
  14. mteb/descriptive_stats/Classification/DutchNewsArticlesClassification.json +90 -0
  15. mteb/descriptive_stats/Classification/DutchSarcasticHeadlinesClassification.json +54 -0
  16. mteb/descriptive_stats/Classification/IconclassClassification.json +96 -0
  17. mteb/descriptive_stats/Classification/OpenTenderClassification.json +222 -0
  18. mteb/descriptive_stats/Classification/VaccinChatNLClassification.json +1068 -0
  19. mteb/descriptive_stats/Clustering/DutchNewsArticlesClusteringP2P.json +45 -0
  20. mteb/descriptive_stats/Clustering/DutchNewsArticlesClusteringS2S.json +45 -0
  21. mteb/descriptive_stats/Clustering/IconclassClusteringS2S.json +48 -0
  22. mteb/descriptive_stats/Clustering/OpenTenderClusteringP2P.json +111 -0
  23. mteb/descriptive_stats/Clustering/OpenTenderClusteringS2S.json +111 -0
  24. mteb/descriptive_stats/Clustering/VABBClusteringP2P.json +60 -0
  25. mteb/descriptive_stats/Clustering/VABBClusteringS2S.json +60 -0
  26. mteb/descriptive_stats/Image/Any2AnyMultilingualRetrieval/XFlickr30kCoT2IRetrieval.json +243 -153
  27. mteb/descriptive_stats/Image/Any2AnyMultilingualRetrieval/XM3600T2IRetrieval.json +999 -629
  28. mteb/descriptive_stats/Image/Any2AnyRetrieval/OVENIT2TRetrieval.json +33 -17
  29. mteb/descriptive_stats/Image/DocumentUnderstanding/MIRACLVisionRetrieval.json +574 -0
  30. mteb/descriptive_stats/MultilabelClassification/CovidDisinformationNLMultiLabelClassification.json +84 -0
  31. mteb/descriptive_stats/MultilabelClassification/VABBMultiLabelClassification.json +156 -0
  32. mteb/descriptive_stats/PairClassification/SICKNLPairClassification.json +35 -0
  33. mteb/descriptive_stats/PairClassification/XLWICNLPairClassification.json +35 -0
  34. mteb/descriptive_stats/Retrieval/ClimateFEVERHardNegatives.v2.json +30 -0
  35. mteb/descriptive_stats/Retrieval/DBPediaHardNegatives.v2.json +30 -0
  36. mteb/descriptive_stats/Retrieval/DutchNewsArticlesRetrieval.json +30 -0
  37. mteb/descriptive_stats/Retrieval/FEVERHardNegatives.v2.json +30 -0
  38. mteb/descriptive_stats/Retrieval/HotpotQAHardNegatives.v2.json +30 -0
  39. mteb/descriptive_stats/Retrieval/LegalQANLRetrieval.json +30 -0
  40. mteb/descriptive_stats/Retrieval/OpenTenderRetrieval.json +30 -0
  41. mteb/descriptive_stats/Retrieval/QuoraRetrievalHardNegatives.v2.json +30 -0
  42. mteb/descriptive_stats/Retrieval/RiaNewsRetrievalHardNegatives.v2.json +30 -0
  43. mteb/descriptive_stats/Retrieval/VABBRetrieval.json +30 -0
  44. mteb/descriptive_stats/Retrieval/VDRMultilingualRetrieval.json +184 -0
  45. mteb/descriptive_stats/Retrieval/bBSARDNLRetrieval.json +30 -0
  46. mteb/descriptive_stats/STS/SICK-NL-STS.json +28 -0
  47. mteb/languages/check_language_code.py +11 -3
  48. mteb/languages/language_scripts.py +4 -0
  49. mteb/leaderboard/text_segments.py +1 -1
  50. mteb/models/model_implementations/b1ade_models.py +1 -1
  51. mteb/models/model_implementations/bge_models.py +1 -3
  52. mteb/models/model_implementations/bmretriever_models.py +1 -1
  53. mteb/models/model_implementations/gme_v_models.py +2 -2
  54. mteb/models/model_implementations/ibm_granite_models.py +1 -1
  55. mteb/models/model_implementations/inf_models.py +3 -3
  56. mteb/models/model_implementations/jina_models.py +12 -2
  57. mteb/models/model_implementations/llm2vec_models.py +1 -1
  58. mteb/models/model_implementations/misc_models.py +2 -2
  59. mteb/models/model_implementations/mxbai_models.py +1 -1
  60. mteb/models/model_implementations/salesforce_models.py +1 -1
  61. mteb/models/model_implementations/seed_1_6_embedding_models.py +1 -1
  62. mteb/models/model_implementations/voyage_v.py +9 -9
  63. mteb/results/task_result.py +6 -8
  64. mteb/tasks/classification/dan/angry_tweets_classification.py +2 -2
  65. mteb/tasks/classification/eng/legal_bench_classification.py +3 -3
  66. mteb/tasks/classification/mya/myanmar_news.py +2 -2
  67. mteb/tasks/classification/nld/__init__.py +16 -0
  68. mteb/tasks/classification/nld/dutch_cola_classification.py +38 -0
  69. mteb/tasks/classification/nld/dutch_government_bias_classification.py +37 -0
  70. mteb/tasks/classification/nld/dutch_news_articles_classification.py +30 -0
  71. mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +36 -0
  72. mteb/tasks/classification/nld/iconclass_classification.py +41 -0
  73. mteb/tasks/classification/nld/open_tender_classification.py +38 -0
  74. mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +46 -0
  75. mteb/tasks/classification/tha/wongnai_reviews_classification.py +1 -1
  76. mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -2
  77. mteb/tasks/clustering/__init__.py +1 -0
  78. mteb/tasks/clustering/nld/__init__.py +17 -0
  79. mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +37 -0
  80. mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +37 -0
  81. mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +47 -0
  82. mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +51 -0
  83. mteb/tasks/clustering/nld/open_tender_clustering_s2s.py +41 -0
  84. mteb/tasks/clustering/nld/vabb_clustering_p2p.py +51 -0
  85. mteb/tasks/clustering/nld/vabb_clustering_s2s.py +51 -0
  86. mteb/tasks/multilabel_classification/__init__.py +1 -0
  87. mteb/tasks/multilabel_classification/nld/__init__.py +9 -0
  88. mteb/tasks/multilabel_classification/nld/covid_disinformation_nl_multi_label_classification.py +88 -0
  89. mteb/tasks/multilabel_classification/nld/vabb_multi_label_classification.py +44 -0
  90. mteb/tasks/pair_classification/__init__.py +1 -0
  91. mteb/tasks/pair_classification/multilingual/indic_xnli_pair_classification.py +9 -8
  92. mteb/tasks/pair_classification/nld/__init__.py +7 -0
  93. mteb/tasks/pair_classification/nld/sick_nl_pair_classification.py +36 -0
  94. mteb/tasks/pair_classification/nld/xlwic_nl_pair_classification.py +41 -0
  95. mteb/tasks/retrieval/code/code_rag.py +8 -8
  96. mteb/tasks/retrieval/dan/dan_fever_retrieval.py +1 -1
  97. mteb/tasks/retrieval/dan/tv2_nordretrieval.py +2 -2
  98. mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +2 -2
  99. mteb/tasks/retrieval/eng/__init__.py +18 -4
  100. mteb/tasks/retrieval/eng/climate_fever_retrieval.py +68 -77
  101. mteb/tasks/retrieval/eng/dbpedia_retrieval.py +55 -50
  102. mteb/tasks/retrieval/eng/fever_retrieval.py +62 -67
  103. mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +0 -4
  104. mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +0 -4
  105. mteb/tasks/retrieval/eng/hotpot_qa_retrieval.py +57 -67
  106. mteb/tasks/retrieval/eng/legal_summarization_retrieval.py +1 -1
  107. mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +0 -3
  108. mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +0 -2
  109. mteb/tasks/retrieval/eng/oven_it2t_retrieval.py +1 -1
  110. mteb/tasks/retrieval/eng/quora_retrieval.py +51 -46
  111. mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +0 -4
  112. mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +0 -4
  113. mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +0 -2
  114. mteb/tasks/retrieval/jpn/ja_gov_faqs_retrieval.py +1 -1
  115. mteb/tasks/retrieval/multilingual/belebele_retrieval.py +1 -1
  116. mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +0 -2
  117. mteb/tasks/retrieval/multilingual/miracl_retrieval.py +1 -1
  118. mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +2 -9
  119. mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +0 -2
  120. mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +0 -2
  121. mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +6 -5
  122. mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +3 -4
  123. mteb/tasks/retrieval/nld/__init__.py +10 -0
  124. mteb/tasks/retrieval/nld/bbsard_nl_retrieval.py +41 -0
  125. mteb/tasks/retrieval/nld/dutch_news_articles_retrieval.py +30 -0
  126. mteb/tasks/retrieval/nld/legal_qa_nl_retrieval.py +39 -0
  127. mteb/tasks/retrieval/nld/open_tender_retrieval.py +38 -0
  128. mteb/tasks/retrieval/nld/vabb_retrieval.py +41 -0
  129. mteb/tasks/retrieval/nob/norquad.py +2 -2
  130. mteb/tasks/retrieval/nob/snl_retrieval.py +2 -2
  131. mteb/tasks/retrieval/rus/__init__.py +11 -2
  132. mteb/tasks/retrieval/rus/ria_news_retrieval.py +48 -44
  133. mteb/tasks/retrieval/tur/tur_hist_quad.py +2 -2
  134. mteb/tasks/sts/__init__.py +1 -0
  135. mteb/tasks/sts/nld/__init__.py +5 -0
  136. mteb/tasks/sts/nld/sick_nl_sts.py +41 -0
  137. mteb-2.1.1.dist-info/METADATA +253 -0
  138. {mteb-2.0.5.dist-info → mteb-2.1.1.dist-info}/RECORD +142 -95
  139. mteb/descriptive_stats/Classification/PersianTextTone.json +0 -56
  140. mteb/descriptive_stats/Image/Any2TextMutipleChoice/CVBenchCount.json +0 -37
  141. mteb/descriptive_stats/Image/Any2TextMutipleChoice/CVBenchDepth.json +0 -25
  142. mteb/descriptive_stats/Image/Any2TextMutipleChoice/CVBenchDistance.json +0 -25
  143. mteb/descriptive_stats/Image/Any2TextMutipleChoice/CVBenchRelation.json +0 -25
  144. mteb/descriptive_stats/Image/VisualSTS/STS12VisualSTS.json +0 -20
  145. mteb/descriptive_stats/Image/VisualSTS/STS13VisualSTS.json +0 -20
  146. mteb/descriptive_stats/Image/VisualSTS/STS14VisualSTS.json +0 -20
  147. mteb/descriptive_stats/Image/VisualSTS/STS15VisualSTS.json +0 -20
  148. mteb/descriptive_stats/Image/VisualSTS/STS16VisualSTS.json +0 -20
  149. mteb/descriptive_stats/Image/VisualSTS/STS17MultilingualVisualSTS.json +0 -220
  150. mteb/descriptive_stats/Image/VisualSTS/STSBenchmarkMultilingualVisualSTS.json +0 -402
  151. mteb/descriptive_stats/Reranking/InstructIR.json +0 -31
  152. mteb-2.0.5.dist-info/METADATA +0 -455
  153. {mteb-2.0.5.dist-info → mteb-2.1.1.dist-info}/WHEEL +0 -0
  154. {mteb-2.0.5.dist-info → mteb-2.1.1.dist-info}/entry_points.txt +0 -0
  155. {mteb-2.0.5.dist-info → mteb-2.1.1.dist-info}/licenses/LICENSE +0 -0
  156. {mteb-2.0.5.dist-info → mteb-2.1.1.dist-info}/top_level.txt +0 -0
@@ -1,743 +1,1113 @@
1
1
  {
2
2
  "test": {
3
- "number_of_characters": 17009034,
4
3
  "num_samples": 390975,
5
- "num_queries": 261375,
6
- "num_documents": 129600,
7
- "min_document_length": 0,
8
- "average_document_length": 0,
9
- "max_document_length": 0,
10
- "unique_documents": 0,
11
- "num_document_images": 129600,
12
- "min_query_length": 9,
13
- "average_query_length": 65.0752137733142,
14
- "max_query_length": 532,
15
- "unique_queries": 259932,
16
- "num_query_images": 0,
17
- "min_relevant_docs_per_query": 1,
18
- "average_relevant_docs_per_query": 1.0,
19
- "max_relevant_docs_per_query": 1,
20
- "unique_relevant_docs": 129600,
4
+ "number_of_characters": 14903725,
5
+ "documents_text_statistics": null,
6
+ "documents_image_statistics": {
7
+ "min_image_width": 285,
8
+ "average_image_width": 618.7413888888889,
9
+ "max_image_width": 1092,
10
+ "min_image_height": 154,
11
+ "average_image_height": 506.8536111111111,
12
+ "max_image_height": 1076,
13
+ "unique_images": 3600
14
+ },
15
+ "queries_text_statistics": {
16
+ "total_text_length": 14903725,
17
+ "min_text_length": 1,
18
+ "average_text_length": 57.02046867527499,
19
+ "max_text_length": 524,
20
+ "unique_texts": 259928
21
+ },
22
+ "queries_image_statistics": null,
23
+ "relevant_docs_statistics": {
24
+ "num_relevant_docs": 261375,
25
+ "min_relevant_docs_per_query": 1,
26
+ "average_relevant_docs_per_query": 1.0,
27
+ "max_relevant_docs_per_query": 1,
28
+ "unique_relevant_docs": 129600
29
+ },
30
+ "top_ranked_statistics": null,
21
31
  "hf_subset_descriptive_stats": {
22
32
  "ar": {
23
- "number_of_characters": 310802,
24
33
  "num_samples": 10967,
25
- "num_queries": 7367,
26
- "num_documents": 3600,
27
- "min_document_length": 0,
28
- "average_document_length": 0,
29
- "max_document_length": 0,
30
- "unique_documents": 0,
31
- "num_document_images": 3600,
32
- "min_query_length": 8,
33
- "average_query_length": 42.18840776435455,
34
- "max_query_length": 208,
35
- "unique_queries": 7339,
36
- "num_query_images": 0,
37
- "min_relevant_docs_per_query": 1,
38
- "average_relevant_docs_per_query": 1.0,
39
- "max_relevant_docs_per_query": 1,
40
- "unique_relevant_docs": 3600
34
+ "number_of_characters": 310802,
35
+ "documents_text_statistics": null,
36
+ "documents_image_statistics": {
37
+ "min_image_width": 285,
38
+ "average_image_width": 618.7413888888889,
39
+ "max_image_width": 1092,
40
+ "min_image_height": 154,
41
+ "average_image_height": 506.8536111111111,
42
+ "max_image_height": 1076,
43
+ "unique_images": 3600
44
+ },
45
+ "queries_text_statistics": {
46
+ "total_text_length": 310802,
47
+ "min_text_length": 8,
48
+ "average_text_length": 42.18840776435455,
49
+ "max_text_length": 208,
50
+ "unique_texts": 7339
51
+ },
52
+ "queries_image_statistics": null,
53
+ "relevant_docs_statistics": {
54
+ "num_relevant_docs": 7367,
55
+ "min_relevant_docs_per_query": 1,
56
+ "average_relevant_docs_per_query": 1.0,
57
+ "max_relevant_docs_per_query": 1,
58
+ "unique_relevant_docs": 3600
59
+ },
60
+ "top_ranked_statistics": null
41
61
  },
42
62
  "bn": {
43
- "number_of_characters": 223622,
44
63
  "num_samples": 7200,
45
- "num_queries": 3600,
46
- "num_documents": 3600,
47
- "min_document_length": 0,
48
- "average_document_length": 0,
49
- "max_document_length": 0,
50
- "unique_documents": 0,
51
- "num_document_images": 3600,
52
- "min_query_length": 28,
53
- "average_query_length": 62.117222222222225,
54
- "max_query_length": 139,
55
- "unique_queries": 3594,
56
- "num_query_images": 0,
57
- "min_relevant_docs_per_query": 1,
58
- "average_relevant_docs_per_query": 1.0,
59
- "max_relevant_docs_per_query": 1,
60
- "unique_relevant_docs": 3600
64
+ "number_of_characters": 223622,
65
+ "documents_text_statistics": null,
66
+ "documents_image_statistics": {
67
+ "min_image_width": 285,
68
+ "average_image_width": 618.7413888888889,
69
+ "max_image_width": 1092,
70
+ "min_image_height": 154,
71
+ "average_image_height": 506.8536111111111,
72
+ "max_image_height": 1076,
73
+ "unique_images": 3600
74
+ },
75
+ "queries_text_statistics": {
76
+ "total_text_length": 223622,
77
+ "min_text_length": 28,
78
+ "average_text_length": 62.117222222222225,
79
+ "max_text_length": 139,
80
+ "unique_texts": 3594
81
+ },
82
+ "queries_image_statistics": null,
83
+ "relevant_docs_statistics": {
84
+ "num_relevant_docs": 3600,
85
+ "min_relevant_docs_per_query": 1,
86
+ "average_relevant_docs_per_query": 1.0,
87
+ "max_relevant_docs_per_query": 1,
88
+ "unique_relevant_docs": 3600
89
+ },
90
+ "top_ranked_statistics": null
61
91
  },
62
92
  "cs": {
63
- "number_of_characters": 282069,
64
93
  "num_samples": 10807,
65
- "num_queries": 7207,
66
- "num_documents": 3600,
67
- "min_document_length": 0,
68
- "average_document_length": 0,
69
- "max_document_length": 0,
70
- "unique_documents": 0,
71
- "num_document_images": 3600,
72
- "min_query_length": 3,
73
- "average_query_length": 39.13819897322048,
74
- "max_query_length": 266,
75
- "unique_queries": 6814,
76
- "num_query_images": 0,
77
- "min_relevant_docs_per_query": 1,
78
- "average_relevant_docs_per_query": 1.0,
79
- "max_relevant_docs_per_query": 1,
80
- "unique_relevant_docs": 3600
94
+ "number_of_characters": 282069,
95
+ "documents_text_statistics": null,
96
+ "documents_image_statistics": {
97
+ "min_image_width": 285,
98
+ "average_image_width": 618.7413888888889,
99
+ "max_image_width": 1092,
100
+ "min_image_height": 154,
101
+ "average_image_height": 506.8536111111111,
102
+ "max_image_height": 1076,
103
+ "unique_images": 3600
104
+ },
105
+ "queries_text_statistics": {
106
+ "total_text_length": 282069,
107
+ "min_text_length": 3,
108
+ "average_text_length": 39.13819897322048,
109
+ "max_text_length": 266,
110
+ "unique_texts": 6814
111
+ },
112
+ "queries_image_statistics": null,
113
+ "relevant_docs_statistics": {
114
+ "num_relevant_docs": 7207,
115
+ "min_relevant_docs_per_query": 1,
116
+ "average_relevant_docs_per_query": 1.0,
117
+ "max_relevant_docs_per_query": 1,
118
+ "unique_relevant_docs": 3600
119
+ },
120
+ "top_ranked_statistics": null
81
121
  },
82
122
  "da": {
83
- "number_of_characters": 351028,
84
123
  "num_samples": 10864,
85
- "num_queries": 7264,
86
- "num_documents": 3600,
87
- "min_document_length": 0,
88
- "average_document_length": 0,
89
- "max_document_length": 0,
90
- "unique_documents": 0,
91
- "num_document_images": 3600,
92
- "min_query_length": 7,
93
- "average_query_length": 48.32433920704846,
94
- "max_query_length": 158,
95
- "unique_queries": 7246,
96
- "num_query_images": 0,
97
- "min_relevant_docs_per_query": 1,
98
- "average_relevant_docs_per_query": 1.0,
99
- "max_relevant_docs_per_query": 1,
100
- "unique_relevant_docs": 3600
124
+ "number_of_characters": 351028,
125
+ "documents_text_statistics": null,
126
+ "documents_image_statistics": {
127
+ "min_image_width": 285,
128
+ "average_image_width": 618.7413888888889,
129
+ "max_image_width": 1092,
130
+ "min_image_height": 154,
131
+ "average_image_height": 506.8536111111111,
132
+ "max_image_height": 1076,
133
+ "unique_images": 3600
134
+ },
135
+ "queries_text_statistics": {
136
+ "total_text_length": 351028,
137
+ "min_text_length": 7,
138
+ "average_text_length": 48.32433920704846,
139
+ "max_text_length": 158,
140
+ "unique_texts": 7246
141
+ },
142
+ "queries_image_statistics": null,
143
+ "relevant_docs_statistics": {
144
+ "num_relevant_docs": 7264,
145
+ "min_relevant_docs_per_query": 1,
146
+ "average_relevant_docs_per_query": 1.0,
147
+ "max_relevant_docs_per_query": 1,
148
+ "unique_relevant_docs": 3600
149
+ },
150
+ "top_ranked_statistics": null
101
151
  },
102
152
  "de": {
103
- "number_of_characters": 660790,
104
153
  "num_samples": 12243,
105
- "num_queries": 8643,
106
- "num_documents": 3600,
107
- "min_document_length": 0,
108
- "average_document_length": 0,
109
- "max_document_length": 0,
110
- "unique_documents": 0,
111
- "num_document_images": 3600,
112
- "min_query_length": 11,
113
- "average_query_length": 76.45377762351036,
114
- "max_query_length": 334,
115
- "unique_queries": 8643,
116
- "num_query_images": 0,
117
- "min_relevant_docs_per_query": 1,
118
- "average_relevant_docs_per_query": 1.0,
119
- "max_relevant_docs_per_query": 1,
120
- "unique_relevant_docs": 3600
154
+ "number_of_characters": 660790,
155
+ "documents_text_statistics": null,
156
+ "documents_image_statistics": {
157
+ "min_image_width": 285,
158
+ "average_image_width": 618.7413888888889,
159
+ "max_image_width": 1092,
160
+ "min_image_height": 154,
161
+ "average_image_height": 506.8536111111111,
162
+ "max_image_height": 1076,
163
+ "unique_images": 3600
164
+ },
165
+ "queries_text_statistics": {
166
+ "total_text_length": 660790,
167
+ "min_text_length": 11,
168
+ "average_text_length": 76.45377762351036,
169
+ "max_text_length": 334,
170
+ "unique_texts": 8643
171
+ },
172
+ "queries_image_statistics": null,
173
+ "relevant_docs_statistics": {
174
+ "num_relevant_docs": 8643,
175
+ "min_relevant_docs_per_query": 1,
176
+ "average_relevant_docs_per_query": 1.0,
177
+ "max_relevant_docs_per_query": 1,
178
+ "unique_relevant_docs": 3600
179
+ },
180
+ "top_ranked_statistics": null
121
181
  },
122
182
  "el": {
123
- "number_of_characters": 370363,
124
183
  "num_samples": 10804,
125
- "num_queries": 7204,
126
- "num_documents": 3600,
127
- "min_document_length": 0,
128
- "average_document_length": 0,
129
- "max_document_length": 0,
130
- "unique_documents": 0,
131
- "num_document_images": 3600,
132
- "min_query_length": 4,
133
- "average_query_length": 51.410744031093834,
134
- "max_query_length": 262,
135
- "unique_queries": 7100,
136
- "num_query_images": 0,
137
- "min_relevant_docs_per_query": 1,
138
- "average_relevant_docs_per_query": 1.0,
139
- "max_relevant_docs_per_query": 1,
140
- "unique_relevant_docs": 3600
184
+ "number_of_characters": 370363,
185
+ "documents_text_statistics": null,
186
+ "documents_image_statistics": {
187
+ "min_image_width": 285,
188
+ "average_image_width": 618.7413888888889,
189
+ "max_image_width": 1092,
190
+ "min_image_height": 154,
191
+ "average_image_height": 506.8536111111111,
192
+ "max_image_height": 1076,
193
+ "unique_images": 3600
194
+ },
195
+ "queries_text_statistics": {
196
+ "total_text_length": 370363,
197
+ "min_text_length": 4,
198
+ "average_text_length": 51.410744031093834,
199
+ "max_text_length": 262,
200
+ "unique_texts": 7100
201
+ },
202
+ "queries_image_statistics": null,
203
+ "relevant_docs_statistics": {
204
+ "num_relevant_docs": 7204,
205
+ "min_relevant_docs_per_query": 1,
206
+ "average_relevant_docs_per_query": 1.0,
207
+ "max_relevant_docs_per_query": 1,
208
+ "unique_relevant_docs": 3600
209
+ },
210
+ "top_ranked_statistics": null
141
211
  },
142
212
  "en": {
143
- "number_of_characters": 356488,
144
213
  "num_samples": 10800,
145
- "num_queries": 7200,
146
- "num_documents": 3600,
147
- "min_document_length": 0,
148
- "average_document_length": 0,
149
- "max_document_length": 0,
150
- "unique_documents": 0,
151
- "num_document_images": 3600,
152
- "min_query_length": 6,
153
- "average_query_length": 49.51222222222222,
154
- "max_query_length": 148,
155
- "unique_queries": 7129,
156
- "num_query_images": 0,
157
- "min_relevant_docs_per_query": 1,
158
- "average_relevant_docs_per_query": 1.0,
159
- "max_relevant_docs_per_query": 1,
160
- "unique_relevant_docs": 3600
214
+ "number_of_characters": 356488,
215
+ "documents_text_statistics": null,
216
+ "documents_image_statistics": {
217
+ "min_image_width": 285,
218
+ "average_image_width": 618.7413888888889,
219
+ "max_image_width": 1092,
220
+ "min_image_height": 154,
221
+ "average_image_height": 506.8536111111111,
222
+ "max_image_height": 1076,
223
+ "unique_images": 3600
224
+ },
225
+ "queries_text_statistics": {
226
+ "total_text_length": 356488,
227
+ "min_text_length": 6,
228
+ "average_text_length": 49.51222222222222,
229
+ "max_text_length": 148,
230
+ "unique_texts": 7129
231
+ },
232
+ "queries_image_statistics": null,
233
+ "relevant_docs_statistics": {
234
+ "num_relevant_docs": 7200,
235
+ "min_relevant_docs_per_query": 1,
236
+ "average_relevant_docs_per_query": 1.0,
237
+ "max_relevant_docs_per_query": 1,
238
+ "unique_relevant_docs": 3600
239
+ },
240
+ "top_ranked_statistics": null
161
241
  },
162
242
  "es": {
163
- "number_of_characters": 485004,
164
243
  "num_samples": 12214,
165
- "num_queries": 8614,
166
- "num_documents": 3600,
167
- "min_document_length": 0,
168
- "average_document_length": 0,
169
- "max_document_length": 0,
170
- "unique_documents": 0,
171
- "num_document_images": 3600,
172
- "min_query_length": 12,
173
- "average_query_length": 56.30415602507546,
174
- "max_query_length": 179,
175
- "unique_queries": 8605,
176
- "num_query_images": 0,
177
- "min_relevant_docs_per_query": 1,
178
- "average_relevant_docs_per_query": 1.0,
179
- "max_relevant_docs_per_query": 1,
180
- "unique_relevant_docs": 3600
244
+ "number_of_characters": 485004,
245
+ "documents_text_statistics": null,
246
+ "documents_image_statistics": {
247
+ "min_image_width": 285,
248
+ "average_image_width": 618.7413888888889,
249
+ "max_image_width": 1092,
250
+ "min_image_height": 154,
251
+ "average_image_height": 506.8536111111111,
252
+ "max_image_height": 1076,
253
+ "unique_images": 3600
254
+ },
255
+ "queries_text_statistics": {
256
+ "total_text_length": 485004,
257
+ "min_text_length": 12,
258
+ "average_text_length": 56.30415602507546,
259
+ "max_text_length": 179,
260
+ "unique_texts": 8605
261
+ },
262
+ "queries_image_statistics": null,
263
+ "relevant_docs_statistics": {
264
+ "num_relevant_docs": 8614,
265
+ "min_relevant_docs_per_query": 1,
266
+ "average_relevant_docs_per_query": 1.0,
267
+ "max_relevant_docs_per_query": 1,
268
+ "unique_relevant_docs": 3600
269
+ },
270
+ "top_ranked_statistics": null
181
271
  },
182
272
  "fa": {
183
- "number_of_characters": 430055,
184
273
  "num_samples": 10845,
185
- "num_queries": 7245,
186
- "num_documents": 3600,
187
- "min_document_length": 0,
188
- "average_document_length": 0,
189
- "max_document_length": 0,
190
- "unique_documents": 0,
191
- "num_document_images": 3600,
192
- "min_query_length": 4,
193
- "average_query_length": 59.35886818495514,
194
- "max_query_length": 289,
195
- "unique_queries": 7242,
196
- "num_query_images": 0,
197
- "min_relevant_docs_per_query": 1,
198
- "average_relevant_docs_per_query": 1.0,
199
- "max_relevant_docs_per_query": 1,
200
- "unique_relevant_docs": 3600
274
+ "number_of_characters": 430055,
275
+ "documents_text_statistics": null,
276
+ "documents_image_statistics": {
277
+ "min_image_width": 285,
278
+ "average_image_width": 618.7413888888889,
279
+ "max_image_width": 1092,
280
+ "min_image_height": 154,
281
+ "average_image_height": 506.8536111111111,
282
+ "max_image_height": 1076,
283
+ "unique_images": 3600
284
+ },
285
+ "queries_text_statistics": {
286
+ "total_text_length": 430055,
287
+ "min_text_length": 4,
288
+ "average_text_length": 59.35886818495514,
289
+ "max_text_length": 289,
290
+ "unique_texts": 7242
291
+ },
292
+ "queries_image_statistics": null,
293
+ "relevant_docs_statistics": {
294
+ "num_relevant_docs": 7245,
295
+ "min_relevant_docs_per_query": 1,
296
+ "average_relevant_docs_per_query": 1.0,
297
+ "max_relevant_docs_per_query": 1,
298
+ "unique_relevant_docs": 3600
299
+ },
300
+ "top_ranked_statistics": null
201
301
  },
202
302
  "fi": {
203
- "number_of_characters": 464334,
204
303
  "num_samples": 10727,
205
- "num_queries": 7127,
206
- "num_documents": 3600,
207
- "min_document_length": 0,
208
- "average_document_length": 0,
209
- "max_document_length": 0,
210
- "unique_documents": 0,
211
- "num_document_images": 3600,
212
- "min_query_length": 1,
213
- "average_query_length": 65.15139609934053,
214
- "max_query_length": 336,
215
- "unique_queries": 7110,
216
- "num_query_images": 0,
217
- "min_relevant_docs_per_query": 1,
218
- "average_relevant_docs_per_query": 1.0,
219
- "max_relevant_docs_per_query": 1,
220
- "unique_relevant_docs": 3600
304
+ "number_of_characters": 464334,
305
+ "documents_text_statistics": null,
306
+ "documents_image_statistics": {
307
+ "min_image_width": 285,
308
+ "average_image_width": 618.7413888888889,
309
+ "max_image_width": 1092,
310
+ "min_image_height": 154,
311
+ "average_image_height": 506.8536111111111,
312
+ "max_image_height": 1076,
313
+ "unique_images": 3600
314
+ },
315
+ "queries_text_statistics": {
316
+ "total_text_length": 464334,
317
+ "min_text_length": 1,
318
+ "average_text_length": 65.15139609934053,
319
+ "max_text_length": 336,
320
+ "unique_texts": 7110
321
+ },
322
+ "queries_image_statistics": null,
323
+ "relevant_docs_statistics": {
324
+ "num_relevant_docs": 7127,
325
+ "min_relevant_docs_per_query": 1,
326
+ "average_relevant_docs_per_query": 1.0,
327
+ "max_relevant_docs_per_query": 1,
328
+ "unique_relevant_docs": 3600
329
+ },
330
+ "top_ranked_statistics": null
221
331
  },
222
332
  "fil": {
223
- "number_of_characters": 480287,
224
333
  "num_samples": 10709,
225
- "num_queries": 7109,
226
- "num_documents": 3600,
227
- "min_document_length": 0,
228
- "average_document_length": 0,
229
- "max_document_length": 0,
230
- "unique_documents": 0,
231
- "num_document_images": 3600,
232
- "min_query_length": 4,
233
- "average_query_length": 67.56041637361092,
234
- "max_query_length": 332,
235
- "unique_queries": 7016,
236
- "num_query_images": 0,
237
- "min_relevant_docs_per_query": 1,
238
- "average_relevant_docs_per_query": 1.0,
239
- "max_relevant_docs_per_query": 1,
240
- "unique_relevant_docs": 3600
334
+ "number_of_characters": 480287,
335
+ "documents_text_statistics": null,
336
+ "documents_image_statistics": {
337
+ "min_image_width": 285,
338
+ "average_image_width": 618.7413888888889,
339
+ "max_image_width": 1092,
340
+ "min_image_height": 154,
341
+ "average_image_height": 506.8536111111111,
342
+ "max_image_height": 1076,
343
+ "unique_images": 3600
344
+ },
345
+ "queries_text_statistics": {
346
+ "total_text_length": 480287,
347
+ "min_text_length": 4,
348
+ "average_text_length": 67.56041637361092,
349
+ "max_text_length": 332,
350
+ "unique_texts": 7016
351
+ },
352
+ "queries_image_statistics": null,
353
+ "relevant_docs_statistics": {
354
+ "num_relevant_docs": 7109,
355
+ "min_relevant_docs_per_query": 1,
356
+ "average_relevant_docs_per_query": 1.0,
357
+ "max_relevant_docs_per_query": 1,
358
+ "unique_relevant_docs": 3600
359
+ },
360
+ "top_ranked_statistics": null
241
361
  },
242
362
  "fr": {
243
- "number_of_characters": 595836,
244
363
  "num_samples": 12162,
245
- "num_queries": 8562,
246
- "num_documents": 3600,
247
- "min_document_length": 0,
248
- "average_document_length": 0,
249
- "max_document_length": 0,
250
- "unique_documents": 0,
251
- "num_document_images": 3600,
252
- "min_query_length": 15,
253
- "average_query_length": 69.59074982480729,
254
- "max_query_length": 173,
255
- "unique_queries": 8560,
256
- "num_query_images": 0,
257
- "min_relevant_docs_per_query": 1,
258
- "average_relevant_docs_per_query": 1.0,
259
- "max_relevant_docs_per_query": 1,
260
- "unique_relevant_docs": 3600
364
+ "number_of_characters": 595836,
365
+ "documents_text_statistics": null,
366
+ "documents_image_statistics": {
367
+ "min_image_width": 285,
368
+ "average_image_width": 618.7413888888889,
369
+ "max_image_width": 1092,
370
+ "min_image_height": 154,
371
+ "average_image_height": 506.8536111111111,
372
+ "max_image_height": 1076,
373
+ "unique_images": 3600
374
+ },
375
+ "queries_text_statistics": {
376
+ "total_text_length": 595836,
377
+ "min_text_length": 15,
378
+ "average_text_length": 69.59074982480729,
379
+ "max_text_length": 173,
380
+ "unique_texts": 8560
381
+ },
382
+ "queries_image_statistics": null,
383
+ "relevant_docs_statistics": {
384
+ "num_relevant_docs": 8562,
385
+ "min_relevant_docs_per_query": 1,
386
+ "average_relevant_docs_per_query": 1.0,
387
+ "max_relevant_docs_per_query": 1,
388
+ "unique_relevant_docs": 3600
389
+ },
390
+ "top_ranked_statistics": null
261
391
  },
262
392
  "he": {
263
- "number_of_characters": 457775,
264
393
  "num_samples": 10800,
265
- "num_queries": 7200,
266
- "num_documents": 3600,
267
- "min_document_length": 0,
268
- "average_document_length": 0,
269
- "max_document_length": 0,
270
- "unique_documents": 0,
271
- "num_document_images": 3600,
272
- "min_query_length": 3,
273
- "average_query_length": 63.579861111111114,
274
- "max_query_length": 453,
275
- "unique_queries": 7190,
276
- "num_query_images": 0,
277
- "min_relevant_docs_per_query": 1,
278
- "average_relevant_docs_per_query": 1.0,
279
- "max_relevant_docs_per_query": 1,
280
- "unique_relevant_docs": 3600
394
+ "number_of_characters": 457775,
395
+ "documents_text_statistics": null,
396
+ "documents_image_statistics": {
397
+ "min_image_width": 285,
398
+ "average_image_width": 618.7413888888889,
399
+ "max_image_width": 1092,
400
+ "min_image_height": 154,
401
+ "average_image_height": 506.8536111111111,
402
+ "max_image_height": 1076,
403
+ "unique_images": 3600
404
+ },
405
+ "queries_text_statistics": {
406
+ "total_text_length": 457775,
407
+ "min_text_length": 3,
408
+ "average_text_length": 63.579861111111114,
409
+ "max_text_length": 453,
410
+ "unique_texts": 7190
411
+ },
412
+ "queries_image_statistics": null,
413
+ "relevant_docs_statistics": {
414
+ "num_relevant_docs": 7200,
415
+ "min_relevant_docs_per_query": 1,
416
+ "average_relevant_docs_per_query": 1.0,
417
+ "max_relevant_docs_per_query": 1,
418
+ "unique_relevant_docs": 3600
419
+ },
420
+ "top_ranked_statistics": null
281
421
  },
282
422
  "hi": {
283
- "number_of_characters": 509092,
284
423
  "num_samples": 12103,
285
- "num_queries": 8503,
286
- "num_documents": 3600,
287
- "min_document_length": 0,
288
- "average_document_length": 0,
289
- "max_document_length": 0,
290
- "unique_documents": 0,
291
- "num_document_images": 3600,
292
- "min_query_length": 3,
293
- "average_query_length": 59.87204516053158,
294
- "max_query_length": 188,
295
- "unique_queries": 8422,
296
- "num_query_images": 0,
297
- "min_relevant_docs_per_query": 1,
298
- "average_relevant_docs_per_query": 1.0,
299
- "max_relevant_docs_per_query": 1,
300
- "unique_relevant_docs": 3600
424
+ "number_of_characters": 509092,
425
+ "documents_text_statistics": null,
426
+ "documents_image_statistics": {
427
+ "min_image_width": 285,
428
+ "average_image_width": 618.7413888888889,
429
+ "max_image_width": 1092,
430
+ "min_image_height": 154,
431
+ "average_image_height": 506.8536111111111,
432
+ "max_image_height": 1076,
433
+ "unique_images": 3600
434
+ },
435
+ "queries_text_statistics": {
436
+ "total_text_length": 509092,
437
+ "min_text_length": 3,
438
+ "average_text_length": 59.87204516053158,
439
+ "max_text_length": 188,
440
+ "unique_texts": 8422
441
+ },
442
+ "queries_image_statistics": null,
443
+ "relevant_docs_statistics": {
444
+ "num_relevant_docs": 8503,
445
+ "min_relevant_docs_per_query": 1,
446
+ "average_relevant_docs_per_query": 1.0,
447
+ "max_relevant_docs_per_query": 1,
448
+ "unique_relevant_docs": 3600
449
+ },
450
+ "top_ranked_statistics": null
301
451
  },
302
452
  "hr": {
303
- "number_of_characters": 420595,
304
453
  "num_samples": 10880,
305
- "num_queries": 7280,
306
- "num_documents": 3600,
307
- "min_document_length": 0,
308
- "average_document_length": 0,
309
- "max_document_length": 0,
310
- "unique_documents": 0,
311
- "num_document_images": 3600,
312
- "min_query_length": 3,
313
- "average_query_length": 57.77403846153846,
314
- "max_query_length": 271,
315
- "unique_queries": 7224,
316
- "num_query_images": 0,
317
- "min_relevant_docs_per_query": 1,
318
- "average_relevant_docs_per_query": 1.0,
319
- "max_relevant_docs_per_query": 1,
320
- "unique_relevant_docs": 3600
454
+ "number_of_characters": 420595,
455
+ "documents_text_statistics": null,
456
+ "documents_image_statistics": {
457
+ "min_image_width": 285,
458
+ "average_image_width": 618.7413888888889,
459
+ "max_image_width": 1092,
460
+ "min_image_height": 154,
461
+ "average_image_height": 506.8536111111111,
462
+ "max_image_height": 1076,
463
+ "unique_images": 3600
464
+ },
465
+ "queries_text_statistics": {
466
+ "total_text_length": 420595,
467
+ "min_text_length": 3,
468
+ "average_text_length": 57.77403846153846,
469
+ "max_text_length": 271,
470
+ "unique_texts": 7224
471
+ },
472
+ "queries_image_statistics": null,
473
+ "relevant_docs_statistics": {
474
+ "num_relevant_docs": 7280,
475
+ "min_relevant_docs_per_query": 1,
476
+ "average_relevant_docs_per_query": 1.0,
477
+ "max_relevant_docs_per_query": 1,
478
+ "unique_relevant_docs": 3600
479
+ },
480
+ "top_ranked_statistics": null
321
481
  },
322
482
  "hu": {
323
- "number_of_characters": 436677,
324
483
  "num_samples": 10816,
325
- "num_queries": 7216,
326
- "num_documents": 3600,
327
- "min_document_length": 0,
328
- "average_document_length": 0,
329
- "max_document_length": 0,
330
- "unique_documents": 0,
331
- "num_document_images": 3600,
332
- "min_query_length": 5,
333
- "average_query_length": 60.51510532150776,
334
- "max_query_length": 393,
335
- "unique_queries": 7209,
336
- "num_query_images": 0,
337
- "min_relevant_docs_per_query": 1,
338
- "average_relevant_docs_per_query": 1.0,
339
- "max_relevant_docs_per_query": 1,
340
- "unique_relevant_docs": 3600
484
+ "number_of_characters": 436677,
485
+ "documents_text_statistics": null,
486
+ "documents_image_statistics": {
487
+ "min_image_width": 285,
488
+ "average_image_width": 618.7413888888889,
489
+ "max_image_width": 1092,
490
+ "min_image_height": 154,
491
+ "average_image_height": 506.8536111111111,
492
+ "max_image_height": 1076,
493
+ "unique_images": 3600
494
+ },
495
+ "queries_text_statistics": {
496
+ "total_text_length": 436677,
497
+ "min_text_length": 5,
498
+ "average_text_length": 60.51510532150776,
499
+ "max_text_length": 393,
500
+ "unique_texts": 7209
501
+ },
502
+ "queries_image_statistics": null,
503
+ "relevant_docs_statistics": {
504
+ "num_relevant_docs": 7216,
505
+ "min_relevant_docs_per_query": 1,
506
+ "average_relevant_docs_per_query": 1.0,
507
+ "max_relevant_docs_per_query": 1,
508
+ "unique_relevant_docs": 3600
509
+ },
510
+ "top_ranked_statistics": null
341
511
  },
342
512
  "id": {
343
- "number_of_characters": 666387,
344
513
  "num_samples": 10726,
345
- "num_queries": 7126,
346
- "num_documents": 3600,
347
- "min_document_length": 0,
348
- "average_document_length": 0,
349
- "max_document_length": 0,
350
- "unique_documents": 0,
351
- "num_document_images": 3600,
352
- "min_query_length": 16,
353
- "average_query_length": 93.5148751052484,
354
- "max_query_length": 286,
355
- "unique_queries": 7125,
356
- "num_query_images": 0,
357
- "min_relevant_docs_per_query": 1,
358
- "average_relevant_docs_per_query": 1.0,
359
- "max_relevant_docs_per_query": 1,
360
- "unique_relevant_docs": 3600
514
+ "number_of_characters": 666387,
515
+ "documents_text_statistics": null,
516
+ "documents_image_statistics": {
517
+ "min_image_width": 285,
518
+ "average_image_width": 618.7413888888889,
519
+ "max_image_width": 1092,
520
+ "min_image_height": 154,
521
+ "average_image_height": 506.8536111111111,
522
+ "max_image_height": 1076,
523
+ "unique_images": 3600
524
+ },
525
+ "queries_text_statistics": {
526
+ "total_text_length": 666387,
527
+ "min_text_length": 16,
528
+ "average_text_length": 93.5148751052484,
529
+ "max_text_length": 286,
530
+ "unique_texts": 7125
531
+ },
532
+ "queries_image_statistics": null,
533
+ "relevant_docs_statistics": {
534
+ "num_relevant_docs": 7126,
535
+ "min_relevant_docs_per_query": 1,
536
+ "average_relevant_docs_per_query": 1.0,
537
+ "max_relevant_docs_per_query": 1,
538
+ "unique_relevant_docs": 3600
539
+ },
540
+ "top_ranked_statistics": null
361
541
  },
362
542
  "it": {
363
- "number_of_characters": 608604,
364
543
  "num_samples": 12071,
365
- "num_queries": 8471,
366
- "num_documents": 3600,
367
- "min_document_length": 0,
368
- "average_document_length": 0,
369
- "max_document_length": 0,
370
- "unique_documents": 0,
371
- "num_document_images": 3600,
372
- "min_query_length": 15,
373
- "average_query_length": 71.84559083933419,
374
- "max_query_length": 201,
375
- "unique_queries": 8470,
376
- "num_query_images": 0,
377
- "min_relevant_docs_per_query": 1,
378
- "average_relevant_docs_per_query": 1.0,
379
- "max_relevant_docs_per_query": 1,
380
- "unique_relevant_docs": 3600
544
+ "number_of_characters": 608604,
545
+ "documents_text_statistics": null,
546
+ "documents_image_statistics": {
547
+ "min_image_width": 285,
548
+ "average_image_width": 618.7413888888889,
549
+ "max_image_width": 1092,
550
+ "min_image_height": 154,
551
+ "average_image_height": 506.8536111111111,
552
+ "max_image_height": 1076,
553
+ "unique_images": 3600
554
+ },
555
+ "queries_text_statistics": {
556
+ "total_text_length": 608604,
557
+ "min_text_length": 15,
558
+ "average_text_length": 71.84559083933419,
559
+ "max_text_length": 201,
560
+ "unique_texts": 8470
561
+ },
562
+ "queries_image_statistics": null,
563
+ "relevant_docs_statistics": {
564
+ "num_relevant_docs": 8471,
565
+ "min_relevant_docs_per_query": 1,
566
+ "average_relevant_docs_per_query": 1.0,
567
+ "max_relevant_docs_per_query": 1,
568
+ "unique_relevant_docs": 3600
569
+ },
570
+ "top_ranked_statistics": null
381
571
  },
382
572
  "ja": {
383
- "number_of_characters": 186672,
384
573
  "num_samples": 10785,
385
- "num_queries": 7185,
386
- "num_documents": 3600,
387
- "min_document_length": 0,
388
- "average_document_length": 0,
389
- "max_document_length": 0,
390
- "unique_documents": 0,
391
- "num_document_images": 3600,
392
- "min_query_length": 1,
393
- "average_query_length": 25.980793319415447,
394
- "max_query_length": 97,
395
- "unique_queries": 7175,
396
- "num_query_images": 0,
397
- "min_relevant_docs_per_query": 1,
398
- "average_relevant_docs_per_query": 1.0,
399
- "max_relevant_docs_per_query": 1,
400
- "unique_relevant_docs": 3600
574
+ "number_of_characters": 186672,
575
+ "documents_text_statistics": null,
576
+ "documents_image_statistics": {
577
+ "min_image_width": 285,
578
+ "average_image_width": 618.7413888888889,
579
+ "max_image_width": 1092,
580
+ "min_image_height": 154,
581
+ "average_image_height": 506.8536111111111,
582
+ "max_image_height": 1076,
583
+ "unique_images": 3600
584
+ },
585
+ "queries_text_statistics": {
586
+ "total_text_length": 186672,
587
+ "min_text_length": 1,
588
+ "average_text_length": 25.980793319415447,
589
+ "max_text_length": 97,
590
+ "unique_texts": 7175
591
+ },
592
+ "queries_image_statistics": null,
593
+ "relevant_docs_statistics": {
594
+ "num_relevant_docs": 7185,
595
+ "min_relevant_docs_per_query": 1,
596
+ "average_relevant_docs_per_query": 1.0,
597
+ "max_relevant_docs_per_query": 1,
598
+ "unique_relevant_docs": 3600
599
+ },
600
+ "top_ranked_statistics": null
401
601
  },
402
602
  "ko": {
403
- "number_of_characters": 188812,
404
603
  "num_samples": 11250,
405
- "num_queries": 7650,
406
- "num_documents": 3600,
407
- "min_document_length": 0,
408
- "average_document_length": 0,
409
- "max_document_length": 0,
410
- "unique_documents": 0,
411
- "num_document_images": 3600,
412
- "min_query_length": 1,
413
- "average_query_length": 24.681307189542483,
414
- "max_query_length": 113,
415
- "unique_queries": 7644,
416
- "num_query_images": 0,
417
- "min_relevant_docs_per_query": 1,
418
- "average_relevant_docs_per_query": 1.0,
419
- "max_relevant_docs_per_query": 1,
420
- "unique_relevant_docs": 3600
604
+ "number_of_characters": 188812,
605
+ "documents_text_statistics": null,
606
+ "documents_image_statistics": {
607
+ "min_image_width": 285,
608
+ "average_image_width": 618.7413888888889,
609
+ "max_image_width": 1092,
610
+ "min_image_height": 154,
611
+ "average_image_height": 506.8536111111111,
612
+ "max_image_height": 1076,
613
+ "unique_images": 3600
614
+ },
615
+ "queries_text_statistics": {
616
+ "total_text_length": 188812,
617
+ "min_text_length": 1,
618
+ "average_text_length": 24.681307189542483,
619
+ "max_text_length": 113,
620
+ "unique_texts": 7644
621
+ },
622
+ "queries_image_statistics": null,
623
+ "relevant_docs_statistics": {
624
+ "num_relevant_docs": 7650,
625
+ "min_relevant_docs_per_query": 1,
626
+ "average_relevant_docs_per_query": 1.0,
627
+ "max_relevant_docs_per_query": 1,
628
+ "unique_relevant_docs": 3600
629
+ },
630
+ "top_ranked_statistics": null
421
631
  },
422
632
  "mi": {
423
- "number_of_characters": 262800,
424
633
  "num_samples": 8332,
425
- "num_queries": 4732,
426
- "num_documents": 3600,
427
- "min_document_length": 0,
428
- "average_document_length": 0,
429
- "max_document_length": 0,
430
- "unique_documents": 0,
431
- "num_document_images": 3600,
432
- "min_query_length": 7,
433
- "average_query_length": 55.53677092138631,
434
- "max_query_length": 304,
435
- "unique_queries": 4707,
436
- "num_query_images": 0,
437
- "min_relevant_docs_per_query": 1,
438
- "average_relevant_docs_per_query": 1.0,
439
- "max_relevant_docs_per_query": 1,
440
- "unique_relevant_docs": 3600
634
+ "number_of_characters": 262800,
635
+ "documents_text_statistics": null,
636
+ "documents_image_statistics": {
637
+ "min_image_width": 285,
638
+ "average_image_width": 618.7413888888889,
639
+ "max_image_width": 1092,
640
+ "min_image_height": 154,
641
+ "average_image_height": 506.8536111111111,
642
+ "max_image_height": 1076,
643
+ "unique_images": 3600
644
+ },
645
+ "queries_text_statistics": {
646
+ "total_text_length": 262800,
647
+ "min_text_length": 7,
648
+ "average_text_length": 55.53677092138631,
649
+ "max_text_length": 304,
650
+ "unique_texts": 4707
651
+ },
652
+ "queries_image_statistics": null,
653
+ "relevant_docs_statistics": {
654
+ "num_relevant_docs": 4732,
655
+ "min_relevant_docs_per_query": 1,
656
+ "average_relevant_docs_per_query": 1.0,
657
+ "max_relevant_docs_per_query": 1,
658
+ "unique_relevant_docs": 3600
659
+ },
660
+ "top_ranked_statistics": null
441
661
  },
442
662
  "nl": {
443
- "number_of_characters": 370231,
444
663
  "num_samples": 11659,
445
- "num_queries": 8059,
446
- "num_documents": 3600,
447
- "min_document_length": 0,
448
- "average_document_length": 0,
449
- "max_document_length": 0,
450
- "unique_documents": 0,
451
- "num_document_images": 3600,
452
- "min_query_length": 4,
453
- "average_query_length": 45.94006700583199,
454
- "max_query_length": 173,
455
- "unique_queries": 8004,
456
- "num_query_images": 0,
457
- "min_relevant_docs_per_query": 1,
458
- "average_relevant_docs_per_query": 1.0,
459
- "max_relevant_docs_per_query": 1,
460
- "unique_relevant_docs": 3600
664
+ "number_of_characters": 370231,
665
+ "documents_text_statistics": null,
666
+ "documents_image_statistics": {
667
+ "min_image_width": 285,
668
+ "average_image_width": 618.7413888888889,
669
+ "max_image_width": 1092,
670
+ "min_image_height": 154,
671
+ "average_image_height": 506.8536111111111,
672
+ "max_image_height": 1076,
673
+ "unique_images": 3600
674
+ },
675
+ "queries_text_statistics": {
676
+ "total_text_length": 370231,
677
+ "min_text_length": 4,
678
+ "average_text_length": 45.94006700583199,
679
+ "max_text_length": 173,
680
+ "unique_texts": 8004
681
+ },
682
+ "queries_image_statistics": null,
683
+ "relevant_docs_statistics": {
684
+ "num_relevant_docs": 8059,
685
+ "min_relevant_docs_per_query": 1,
686
+ "average_relevant_docs_per_query": 1.0,
687
+ "max_relevant_docs_per_query": 1,
688
+ "unique_relevant_docs": 3600
689
+ },
690
+ "top_ranked_statistics": null
461
691
  },
462
692
  "no": {
463
- "number_of_characters": 391381,
464
693
  "num_samples": 10813,
465
- "num_queries": 7213,
466
- "num_documents": 3600,
467
- "min_document_length": 0,
468
- "average_document_length": 0,
469
- "max_document_length": 0,
470
- "unique_documents": 0,
471
- "num_document_images": 3600,
472
- "min_query_length": 5,
473
- "average_query_length": 54.26050187162068,
474
- "max_query_length": 162,
475
- "unique_queries": 7191,
476
- "num_query_images": 0,
477
- "min_relevant_docs_per_query": 1,
478
- "average_relevant_docs_per_query": 1.0,
479
- "max_relevant_docs_per_query": 1,
480
- "unique_relevant_docs": 3600
694
+ "number_of_characters": 391381,
695
+ "documents_text_statistics": null,
696
+ "documents_image_statistics": {
697
+ "min_image_width": 285,
698
+ "average_image_width": 618.7413888888889,
699
+ "max_image_width": 1092,
700
+ "min_image_height": 154,
701
+ "average_image_height": 506.8536111111111,
702
+ "max_image_height": 1076,
703
+ "unique_images": 3600
704
+ },
705
+ "queries_text_statistics": {
706
+ "total_text_length": 391381,
707
+ "min_text_length": 5,
708
+ "average_text_length": 54.26050187162068,
709
+ "max_text_length": 162,
710
+ "unique_texts": 7191
711
+ },
712
+ "queries_image_statistics": null,
713
+ "relevant_docs_statistics": {
714
+ "num_relevant_docs": 7213,
715
+ "min_relevant_docs_per_query": 1,
716
+ "average_relevant_docs_per_query": 1.0,
717
+ "max_relevant_docs_per_query": 1,
718
+ "unique_relevant_docs": 3600
719
+ },
720
+ "top_ranked_statistics": null
481
721
  },
482
722
  "pl": {
483
- "number_of_characters": 411189,
484
723
  "num_samples": 10741,
485
- "num_queries": 7141,
486
- "num_documents": 3600,
487
- "min_document_length": 0,
488
- "average_document_length": 0,
489
- "max_document_length": 0,
490
- "unique_documents": 0,
491
- "num_document_images": 3600,
492
- "min_query_length": 4,
493
- "average_query_length": 57.58143117210475,
494
- "max_query_length": 226,
495
- "unique_queries": 7117,
496
- "num_query_images": 0,
497
- "min_relevant_docs_per_query": 1,
498
- "average_relevant_docs_per_query": 1.0,
499
- "max_relevant_docs_per_query": 1,
500
- "unique_relevant_docs": 3600
724
+ "number_of_characters": 411189,
725
+ "documents_text_statistics": null,
726
+ "documents_image_statistics": {
727
+ "min_image_width": 285,
728
+ "average_image_width": 618.7413888888889,
729
+ "max_image_width": 1092,
730
+ "min_image_height": 154,
731
+ "average_image_height": 506.8536111111111,
732
+ "max_image_height": 1076,
733
+ "unique_images": 3600
734
+ },
735
+ "queries_text_statistics": {
736
+ "total_text_length": 411189,
737
+ "min_text_length": 4,
738
+ "average_text_length": 57.58143117210475,
739
+ "max_text_length": 226,
740
+ "unique_texts": 7117
741
+ },
742
+ "queries_image_statistics": null,
743
+ "relevant_docs_statistics": {
744
+ "num_relevant_docs": 7141,
745
+ "min_relevant_docs_per_query": 1,
746
+ "average_relevant_docs_per_query": 1.0,
747
+ "max_relevant_docs_per_query": 1,
748
+ "unique_relevant_docs": 3600
749
+ },
750
+ "top_ranked_statistics": null
501
751
  },
502
752
  "pt": {
503
- "number_of_characters": 446873,
504
753
  "num_samples": 10843,
505
- "num_queries": 7243,
506
- "num_documents": 3600,
507
- "min_document_length": 0,
508
- "average_document_length": 0,
509
- "max_document_length": 0,
510
- "unique_documents": 0,
511
- "num_document_images": 3600,
512
- "min_query_length": 10,
513
- "average_query_length": 61.697224906806575,
514
- "max_query_length": 324,
515
- "unique_queries": 7220,
516
- "num_query_images": 0,
517
- "min_relevant_docs_per_query": 1,
518
- "average_relevant_docs_per_query": 1.0,
519
- "max_relevant_docs_per_query": 1,
520
- "unique_relevant_docs": 3600
754
+ "number_of_characters": 446873,
755
+ "documents_text_statistics": null,
756
+ "documents_image_statistics": {
757
+ "min_image_width": 285,
758
+ "average_image_width": 618.7413888888889,
759
+ "max_image_width": 1092,
760
+ "min_image_height": 154,
761
+ "average_image_height": 506.8536111111111,
762
+ "max_image_height": 1076,
763
+ "unique_images": 3600
764
+ },
765
+ "queries_text_statistics": {
766
+ "total_text_length": 446873,
767
+ "min_text_length": 10,
768
+ "average_text_length": 61.697224906806575,
769
+ "max_text_length": 324,
770
+ "unique_texts": 7220
771
+ },
772
+ "queries_image_statistics": null,
773
+ "relevant_docs_statistics": {
774
+ "num_relevant_docs": 7243,
775
+ "min_relevant_docs_per_query": 1,
776
+ "average_relevant_docs_per_query": 1.0,
777
+ "max_relevant_docs_per_query": 1,
778
+ "unique_relevant_docs": 3600
779
+ },
780
+ "top_ranked_statistics": null
521
781
  },
522
782
  "quz": {
523
- "number_of_characters": 278263,
524
783
  "num_samples": 10800,
525
- "num_queries": 7200,
526
- "num_documents": 3600,
527
- "min_document_length": 0,
528
- "average_document_length": 0,
529
- "max_document_length": 0,
530
- "unique_documents": 0,
531
- "num_document_images": 3600,
532
- "min_query_length": 2,
533
- "average_query_length": 38.64763888888889,
534
- "max_query_length": 234,
535
- "unique_queries": 7130,
536
- "num_query_images": 0,
537
- "min_relevant_docs_per_query": 1,
538
- "average_relevant_docs_per_query": 1.0,
539
- "max_relevant_docs_per_query": 1,
540
- "unique_relevant_docs": 3600
784
+ "number_of_characters": 278263,
785
+ "documents_text_statistics": null,
786
+ "documents_image_statistics": {
787
+ "min_image_width": 285,
788
+ "average_image_width": 618.7413888888889,
789
+ "max_image_width": 1092,
790
+ "min_image_height": 154,
791
+ "average_image_height": 506.8536111111111,
792
+ "max_image_height": 1076,
793
+ "unique_images": 3600
794
+ },
795
+ "queries_text_statistics": {
796
+ "total_text_length": 278263,
797
+ "min_text_length": 2,
798
+ "average_text_length": 38.64763888888889,
799
+ "max_text_length": 234,
800
+ "unique_texts": 7130
801
+ },
802
+ "queries_image_statistics": null,
803
+ "relevant_docs_statistics": {
804
+ "num_relevant_docs": 7200,
805
+ "min_relevant_docs_per_query": 1,
806
+ "average_relevant_docs_per_query": 1.0,
807
+ "max_relevant_docs_per_query": 1,
808
+ "unique_relevant_docs": 3600
809
+ },
810
+ "top_ranked_statistics": null
541
811
  },
542
812
  "ro": {
543
- "number_of_characters": 629977,
544
813
  "num_samples": 10723,
545
- "num_queries": 7123,
546
- "num_documents": 3600,
547
- "min_document_length": 0,
548
- "average_document_length": 0,
549
- "max_document_length": 0,
550
- "unique_documents": 0,
551
- "num_document_images": 3600,
552
- "min_query_length": 5,
553
- "average_query_length": 88.44265056858066,
554
- "max_query_length": 524,
555
- "unique_queries": 7122,
556
- "num_query_images": 0,
557
- "min_relevant_docs_per_query": 1,
558
- "average_relevant_docs_per_query": 1.0,
559
- "max_relevant_docs_per_query": 1,
560
- "unique_relevant_docs": 3600
814
+ "number_of_characters": 629977,
815
+ "documents_text_statistics": null,
816
+ "documents_image_statistics": {
817
+ "min_image_width": 285,
818
+ "average_image_width": 618.7413888888889,
819
+ "max_image_width": 1092,
820
+ "min_image_height": 154,
821
+ "average_image_height": 506.8536111111111,
822
+ "max_image_height": 1076,
823
+ "unique_images": 3600
824
+ },
825
+ "queries_text_statistics": {
826
+ "total_text_length": 629977,
827
+ "min_text_length": 5,
828
+ "average_text_length": 88.44265056858066,
829
+ "max_text_length": 524,
830
+ "unique_texts": 7122
831
+ },
832
+ "queries_image_statistics": null,
833
+ "relevant_docs_statistics": {
834
+ "num_relevant_docs": 7123,
835
+ "min_relevant_docs_per_query": 1,
836
+ "average_relevant_docs_per_query": 1.0,
837
+ "max_relevant_docs_per_query": 1,
838
+ "unique_relevant_docs": 3600
839
+ },
840
+ "top_ranked_statistics": null
561
841
  },
562
842
  "ru": {
563
- "number_of_characters": 477558,
564
843
  "num_samples": 10800,
565
- "num_queries": 7200,
566
- "num_documents": 3600,
567
- "min_document_length": 0,
568
- "average_document_length": 0,
569
- "max_document_length": 0,
570
- "unique_documents": 0,
571
- "num_document_images": 3600,
572
- "min_query_length": 11,
573
- "average_query_length": 66.3275,
574
- "max_query_length": 232,
575
- "unique_queries": 7194,
576
- "num_query_images": 0,
577
- "min_relevant_docs_per_query": 1,
578
- "average_relevant_docs_per_query": 1.0,
579
- "max_relevant_docs_per_query": 1,
580
- "unique_relevant_docs": 3600
844
+ "number_of_characters": 477558,
845
+ "documents_text_statistics": null,
846
+ "documents_image_statistics": {
847
+ "min_image_width": 285,
848
+ "average_image_width": 618.7413888888889,
849
+ "max_image_width": 1092,
850
+ "min_image_height": 154,
851
+ "average_image_height": 506.8536111111111,
852
+ "max_image_height": 1076,
853
+ "unique_images": 3600
854
+ },
855
+ "queries_text_statistics": {
856
+ "total_text_length": 477558,
857
+ "min_text_length": 11,
858
+ "average_text_length": 66.3275,
859
+ "max_text_length": 232,
860
+ "unique_texts": 7194
861
+ },
862
+ "queries_image_statistics": null,
863
+ "relevant_docs_statistics": {
864
+ "num_relevant_docs": 7200,
865
+ "min_relevant_docs_per_query": 1,
866
+ "average_relevant_docs_per_query": 1.0,
867
+ "max_relevant_docs_per_query": 1,
868
+ "unique_relevant_docs": 3600
869
+ },
870
+ "top_ranked_statistics": null
581
871
  },
582
872
  "sv": {
583
- "number_of_characters": 339400,
584
873
  "num_samples": 10873,
585
- "num_queries": 7273,
586
- "num_documents": 3600,
587
- "min_document_length": 0,
588
- "average_document_length": 0,
589
- "max_document_length": 0,
590
- "unique_documents": 0,
591
- "num_document_images": 3600,
592
- "min_query_length": 6,
593
- "average_query_length": 46.66575003437371,
594
- "max_query_length": 174,
595
- "unique_queries": 7199,
596
- "num_query_images": 0,
597
- "min_relevant_docs_per_query": 1,
598
- "average_relevant_docs_per_query": 1.0,
599
- "max_relevant_docs_per_query": 1,
600
- "unique_relevant_docs": 3600
874
+ "number_of_characters": 339400,
875
+ "documents_text_statistics": null,
876
+ "documents_image_statistics": {
877
+ "min_image_width": 285,
878
+ "average_image_width": 618.7413888888889,
879
+ "max_image_width": 1092,
880
+ "min_image_height": 154,
881
+ "average_image_height": 506.8536111111111,
882
+ "max_image_height": 1076,
883
+ "unique_images": 3600
884
+ },
885
+ "queries_text_statistics": {
886
+ "total_text_length": 339400,
887
+ "min_text_length": 6,
888
+ "average_text_length": 46.66575003437371,
889
+ "max_text_length": 174,
890
+ "unique_texts": 7199
891
+ },
892
+ "queries_image_statistics": null,
893
+ "relevant_docs_statistics": {
894
+ "num_relevant_docs": 7273,
895
+ "min_relevant_docs_per_query": 1,
896
+ "average_relevant_docs_per_query": 1.0,
897
+ "max_relevant_docs_per_query": 1,
898
+ "unique_relevant_docs": 3600
899
+ },
900
+ "top_ranked_statistics": null
601
901
  },
602
902
  "sw": {
603
- "number_of_characters": 444085,
604
903
  "num_samples": 10646,
605
- "num_queries": 7046,
606
- "num_documents": 3600,
607
- "min_document_length": 0,
608
- "average_document_length": 0,
609
- "max_document_length": 0,
610
- "unique_documents": 0,
611
- "num_document_images": 3600,
612
- "min_query_length": 2,
613
- "average_query_length": 63.026539880783425,
614
- "max_query_length": 299,
615
- "unique_queries": 7014,
616
- "num_query_images": 0,
617
- "min_relevant_docs_per_query": 1,
618
- "average_relevant_docs_per_query": 1.0,
619
- "max_relevant_docs_per_query": 1,
620
- "unique_relevant_docs": 3600
904
+ "number_of_characters": 444085,
905
+ "documents_text_statistics": null,
906
+ "documents_image_statistics": {
907
+ "min_image_width": 285,
908
+ "average_image_width": 618.7413888888889,
909
+ "max_image_width": 1092,
910
+ "min_image_height": 154,
911
+ "average_image_height": 506.8536111111111,
912
+ "max_image_height": 1076,
913
+ "unique_images": 3600
914
+ },
915
+ "queries_text_statistics": {
916
+ "total_text_length": 444085,
917
+ "min_text_length": 2,
918
+ "average_text_length": 63.026539880783425,
919
+ "max_text_length": 299,
920
+ "unique_texts": 7014
921
+ },
922
+ "queries_image_statistics": null,
923
+ "relevant_docs_statistics": {
924
+ "num_relevant_docs": 7046,
925
+ "min_relevant_docs_per_query": 1,
926
+ "average_relevant_docs_per_query": 1.0,
927
+ "max_relevant_docs_per_query": 1,
928
+ "unique_relevant_docs": 3600
929
+ },
930
+ "top_ranked_statistics": null
621
931
  },
622
932
  "te": {
623
- "number_of_characters": 341340,
624
933
  "num_samples": 10800,
625
- "num_queries": 7200,
626
- "num_documents": 3600,
627
- "min_document_length": 0,
628
- "average_document_length": 0,
629
- "max_document_length": 0,
630
- "unique_documents": 0,
631
- "num_document_images": 3600,
632
- "min_query_length": 16,
633
- "average_query_length": 47.40833333333333,
634
- "max_query_length": 132,
635
- "unique_queries": 7062,
636
- "num_query_images": 0,
637
- "min_relevant_docs_per_query": 1,
638
- "average_relevant_docs_per_query": 1.0,
639
- "max_relevant_docs_per_query": 1,
640
- "unique_relevant_docs": 3600
934
+ "number_of_characters": 341340,
935
+ "documents_text_statistics": null,
936
+ "documents_image_statistics": {
937
+ "min_image_width": 285,
938
+ "average_image_width": 618.7413888888889,
939
+ "max_image_width": 1092,
940
+ "min_image_height": 154,
941
+ "average_image_height": 506.8536111111111,
942
+ "max_image_height": 1076,
943
+ "unique_images": 3600
944
+ },
945
+ "queries_text_statistics": {
946
+ "total_text_length": 341340,
947
+ "min_text_length": 16,
948
+ "average_text_length": 47.40833333333333,
949
+ "max_text_length": 132,
950
+ "unique_texts": 7062
951
+ },
952
+ "queries_image_statistics": null,
953
+ "relevant_docs_statistics": {
954
+ "num_relevant_docs": 7200,
955
+ "min_relevant_docs_per_query": 1,
956
+ "average_relevant_docs_per_query": 1.0,
957
+ "max_relevant_docs_per_query": 1,
958
+ "unique_relevant_docs": 3600
959
+ },
960
+ "top_ranked_statistics": null
641
961
  },
642
962
  "th": {
643
- "number_of_characters": 344730,
644
963
  "num_samples": 10800,
645
- "num_queries": 7200,
646
- "num_documents": 3600,
647
- "min_document_length": 0,
648
- "average_document_length": 0,
649
- "max_document_length": 0,
650
- "unique_documents": 0,
651
- "num_document_images": 3600,
652
- "min_query_length": 8,
653
- "average_query_length": 47.87916666666667,
654
- "max_query_length": 147,
655
- "unique_queries": 7170,
656
- "num_query_images": 0,
657
- "min_relevant_docs_per_query": 1,
658
- "average_relevant_docs_per_query": 1.0,
659
- "max_relevant_docs_per_query": 1,
660
- "unique_relevant_docs": 3600
964
+ "number_of_characters": 344730,
965
+ "documents_text_statistics": null,
966
+ "documents_image_statistics": {
967
+ "min_image_width": 285,
968
+ "average_image_width": 618.7413888888889,
969
+ "max_image_width": 1092,
970
+ "min_image_height": 154,
971
+ "average_image_height": 506.8536111111111,
972
+ "max_image_height": 1076,
973
+ "unique_images": 3600
974
+ },
975
+ "queries_text_statistics": {
976
+ "total_text_length": 344730,
977
+ "min_text_length": 8,
978
+ "average_text_length": 47.87916666666667,
979
+ "max_text_length": 147,
980
+ "unique_texts": 7170
981
+ },
982
+ "queries_image_statistics": null,
983
+ "relevant_docs_statistics": {
984
+ "num_relevant_docs": 7200,
985
+ "min_relevant_docs_per_query": 1,
986
+ "average_relevant_docs_per_query": 1.0,
987
+ "max_relevant_docs_per_query": 1,
988
+ "unique_relevant_docs": 3600
989
+ },
990
+ "top_ranked_statistics": null
661
991
  },
662
992
  "tr": {
663
- "number_of_characters": 458639,
664
993
  "num_samples": 10833,
665
- "num_queries": 7233,
666
- "num_documents": 3600,
667
- "min_document_length": 0,
668
- "average_document_length": 0,
669
- "max_document_length": 0,
670
- "unique_documents": 0,
671
- "num_document_images": 3600,
672
- "min_query_length": 7,
673
- "average_query_length": 63.409235448638185,
674
- "max_query_length": 453,
675
- "unique_queries": 7224,
676
- "num_query_images": 0,
677
- "min_relevant_docs_per_query": 1,
678
- "average_relevant_docs_per_query": 1.0,
679
- "max_relevant_docs_per_query": 1,
680
- "unique_relevant_docs": 3600
994
+ "number_of_characters": 458639,
995
+ "documents_text_statistics": null,
996
+ "documents_image_statistics": {
997
+ "min_image_width": 285,
998
+ "average_image_width": 618.7413888888889,
999
+ "max_image_width": 1092,
1000
+ "min_image_height": 154,
1001
+ "average_image_height": 506.8536111111111,
1002
+ "max_image_height": 1076,
1003
+ "unique_images": 3600
1004
+ },
1005
+ "queries_text_statistics": {
1006
+ "total_text_length": 458639,
1007
+ "min_text_length": 7,
1008
+ "average_text_length": 63.409235448638185,
1009
+ "max_text_length": 453,
1010
+ "unique_texts": 7224
1011
+ },
1012
+ "queries_image_statistics": null,
1013
+ "relevant_docs_statistics": {
1014
+ "num_relevant_docs": 7233,
1015
+ "min_relevant_docs_per_query": 1,
1016
+ "average_relevant_docs_per_query": 1.0,
1017
+ "max_relevant_docs_per_query": 1,
1018
+ "unique_relevant_docs": 3600
1019
+ },
1020
+ "top_ranked_statistics": null
681
1021
  },
682
1022
  "uk": {
683
- "number_of_characters": 474311,
684
1023
  "num_samples": 10815,
685
- "num_queries": 7215,
686
- "num_documents": 3600,
687
- "min_document_length": 0,
688
- "average_document_length": 0,
689
- "max_document_length": 0,
690
- "unique_documents": 0,
691
- "num_document_images": 3600,
692
- "min_query_length": 10,
693
- "average_query_length": 65.73957033957034,
694
- "max_query_length": 372,
695
- "unique_queries": 7206,
696
- "num_query_images": 0,
697
- "min_relevant_docs_per_query": 1,
698
- "average_relevant_docs_per_query": 1.0,
699
- "max_relevant_docs_per_query": 1,
700
- "unique_relevant_docs": 3600
1024
+ "number_of_characters": 474311,
1025
+ "documents_text_statistics": null,
1026
+ "documents_image_statistics": {
1027
+ "min_image_width": 285,
1028
+ "average_image_width": 618.7413888888889,
1029
+ "max_image_width": 1092,
1030
+ "min_image_height": 154,
1031
+ "average_image_height": 506.8536111111111,
1032
+ "max_image_height": 1076,
1033
+ "unique_images": 3600
1034
+ },
1035
+ "queries_text_statistics": {
1036
+ "total_text_length": 474311,
1037
+ "min_text_length": 10,
1038
+ "average_text_length": 65.73957033957034,
1039
+ "max_text_length": 372,
1040
+ "unique_texts": 7206
1041
+ },
1042
+ "queries_image_statistics": null,
1043
+ "relevant_docs_statistics": {
1044
+ "num_relevant_docs": 7215,
1045
+ "min_relevant_docs_per_query": 1,
1046
+ "average_relevant_docs_per_query": 1.0,
1047
+ "max_relevant_docs_per_query": 1,
1048
+ "unique_relevant_docs": 3600
1049
+ },
1050
+ "top_ranked_statistics": null
701
1051
  },
702
1052
  "vi": {
703
- "number_of_characters": 582546,
704
1053
  "num_samples": 10950,
705
- "num_queries": 7350,
706
- "num_documents": 3600,
707
- "min_document_length": 0,
708
- "average_document_length": 0,
709
- "max_document_length": 0,
710
- "unique_documents": 0,
711
- "num_document_images": 3600,
712
- "min_query_length": 6,
713
- "average_query_length": 79.25795918367346,
714
- "max_query_length": 287,
715
- "unique_queries": 7350,
716
- "num_query_images": 0,
717
- "min_relevant_docs_per_query": 1,
718
- "average_relevant_docs_per_query": 1.0,
719
- "max_relevant_docs_per_query": 1,
720
- "unique_relevant_docs": 3600
1054
+ "number_of_characters": 582546,
1055
+ "documents_text_statistics": null,
1056
+ "documents_image_statistics": {
1057
+ "min_image_width": 285,
1058
+ "average_image_width": 618.7413888888889,
1059
+ "max_image_width": 1092,
1060
+ "min_image_height": 154,
1061
+ "average_image_height": 506.8536111111111,
1062
+ "max_image_height": 1076,
1063
+ "unique_images": 3600
1064
+ },
1065
+ "queries_text_statistics": {
1066
+ "total_text_length": 582546,
1067
+ "min_text_length": 6,
1068
+ "average_text_length": 79.25795918367346,
1069
+ "max_text_length": 287,
1070
+ "unique_texts": 7350
1071
+ },
1072
+ "queries_image_statistics": null,
1073
+ "relevant_docs_statistics": {
1074
+ "num_relevant_docs": 7350,
1075
+ "min_relevant_docs_per_query": 1,
1076
+ "average_relevant_docs_per_query": 1.0,
1077
+ "max_relevant_docs_per_query": 1,
1078
+ "unique_relevant_docs": 3600
1079
+ },
1080
+ "top_ranked_statistics": null
721
1081
  },
722
1082
  "zh": {
723
- "number_of_characters": 165110,
724
1083
  "num_samples": 10774,
725
- "num_queries": 7174,
726
- "num_documents": 3600,
727
- "min_document_length": 0,
728
- "average_document_length": 0,
729
- "max_document_length": 0,
730
- "unique_documents": 0,
731
- "num_document_images": 3600,
732
- "min_query_length": 1,
733
- "average_query_length": 23.01505436297742,
734
- "max_query_length": 96,
735
- "unique_queries": 7165,
736
- "num_query_images": 0,
737
- "min_relevant_docs_per_query": 1,
738
- "average_relevant_docs_per_query": 1.0,
739
- "max_relevant_docs_per_query": 1,
740
- "unique_relevant_docs": 3600
1084
+ "number_of_characters": 165110,
1085
+ "documents_text_statistics": null,
1086
+ "documents_image_statistics": {
1087
+ "min_image_width": 285,
1088
+ "average_image_width": 618.7413888888889,
1089
+ "max_image_width": 1092,
1090
+ "min_image_height": 154,
1091
+ "average_image_height": 506.8536111111111,
1092
+ "max_image_height": 1076,
1093
+ "unique_images": 3600
1094
+ },
1095
+ "queries_text_statistics": {
1096
+ "total_text_length": 165110,
1097
+ "min_text_length": 1,
1098
+ "average_text_length": 23.01505436297742,
1099
+ "max_text_length": 96,
1100
+ "unique_texts": 7165
1101
+ },
1102
+ "queries_image_statistics": null,
1103
+ "relevant_docs_statistics": {
1104
+ "num_relevant_docs": 7174,
1105
+ "min_relevant_docs_per_query": 1,
1106
+ "average_relevant_docs_per_query": 1.0,
1107
+ "max_relevant_docs_per_query": 1,
1108
+ "unique_relevant_docs": 3600
1109
+ },
1110
+ "top_ranked_statistics": null
741
1111
  }
742
1112
  }
743
1113
  }