mteb 2.0.4__py3-none-any.whl → 2.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. mteb/descriptive_stats/BitextMining/BUCC.json +70 -40
  2. mteb/descriptive_stats/Classification/DKHateClassification.json +40 -24
  3. mteb/descriptive_stats/Classification/FinancialPhrasebankClassification.json +23 -15
  4. mteb/descriptive_stats/Classification/ImdbClassification.json +40 -24
  5. mteb/descriptive_stats/Classification/KorHateClassification.json +23 -15
  6. mteb/descriptive_stats/Clustering/ArxivClusteringP2P.json +555 -550
  7. mteb/descriptive_stats/Clustering/ArxivClusteringP2P.v2.json +546 -541
  8. mteb/descriptive_stats/Clustering/ArxivClusteringS2S.json +555 -550
  9. mteb/descriptive_stats/Clustering/MLSUMClusteringP2P.json +2466 -2416
  10. mteb/descriptive_stats/Clustering/RedditClusteringP2P.json +1365 -1360
  11. mteb/descriptive_stats/Clustering/SNLClustering.json +378 -373
  12. mteb/descriptive_stats/Clustering/SwednClustering.json +28 -23
  13. mteb/descriptive_stats/Clustering/VGClustering.json +54 -49
  14. mteb/descriptive_stats/Image/Any2AnyMultilingualRetrieval/WITT2IRetrieval.json +324 -204
  15. mteb/descriptive_stats/Image/Any2AnyRetrieval/MemotionI2TRetrieval.json +28 -18
  16. mteb/descriptive_stats/Image/DocumentUnderstanding/JinaVDRAirbnbSyntheticRetrieval.json +334 -0
  17. mteb/descriptive_stats/Image/DocumentUnderstanding/JinaVDRGitHubReadmeRetrieval.json +544 -0
  18. mteb/descriptive_stats/Image/DocumentUnderstanding/JinaVDRTweetStockSyntheticsRetrieval.json +334 -0
  19. mteb/descriptive_stats/Image/DocumentUnderstanding/JinaVDRWikimediaCommonsDocumentsRetrieval.json +634 -0
  20. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore2ESGReportsRetrieval.json +154 -0
  21. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore2EconomicsReportsRetrieval.json +154 -0
  22. mteb/descriptive_stats/Image/ImageClassification/Imagenet1k.json +6039 -3007
  23. mteb/descriptive_stats/Image/ZeroShotClassification/Imagenet1kZeroShot.json +3024 -3010
  24. mteb/descriptive_stats/Image/ZeroShotClassification/PatchCamelyonZeroShot.json +30 -16
  25. mteb/descriptive_stats/Reranking/MIRACLReranking.json +555 -479
  26. mteb/descriptive_stats/Reranking/MindSmallReranking.json +29 -25
  27. mteb/descriptive_stats/Retrieval/AlloprofRetrieval.json +25 -26
  28. mteb/descriptive_stats/Retrieval/Code1Retrieval.json +30 -0
  29. mteb/descriptive_stats/Retrieval/DanFEVER.json +25 -26
  30. mteb/descriptive_stats/Retrieval/EnglishFinance1Retrieval.json +30 -0
  31. mteb/descriptive_stats/Retrieval/EnglishFinance2Retrieval.json +30 -0
  32. mteb/descriptive_stats/Retrieval/EnglishFinance3Retrieval.json +30 -0
  33. mteb/descriptive_stats/Retrieval/EnglishFinance4Retrieval.json +30 -0
  34. mteb/descriptive_stats/Retrieval/EnglishHealthcare1Retrieval.json +30 -0
  35. mteb/descriptive_stats/Retrieval/French1Retrieval.json +30 -0
  36. mteb/descriptive_stats/Retrieval/FrenchLegal1Retrieval.json +30 -0
  37. mteb/descriptive_stats/Retrieval/German1Retrieval.json +30 -0
  38. mteb/descriptive_stats/Retrieval/GermanHealthcare1Retrieval.json +30 -0
  39. mteb/descriptive_stats/Retrieval/GermanLegal1Retrieval.json +30 -0
  40. mteb/descriptive_stats/Retrieval/JapaneseCode1Retrieval.json +30 -0
  41. mteb/descriptive_stats/Retrieval/JapaneseLegal1Retrieval.json +30 -0
  42. mteb/descriptive_stats/Retrieval/MIRACLRetrieval.json +475 -494
  43. mteb/descriptive_stats/Retrieval/MSMARCO-Fa.json +25 -26
  44. mteb/descriptive_stats/Retrieval/MSMARCO.json +25 -84
  45. mteb/descriptive_stats/Retrieval/Touche2020.json +25 -26
  46. mteb/descriptive_stats/Summarization/SummEval.json +27 -50
  47. mteb/descriptive_stats/Summarization/SummEvalFr.json +27 -50
  48. mteb/models/model_implementations/kalm_models.py +29 -0
  49. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +1 -1
  50. mteb/tasks/classification/eng/financial_phrasebank_classification.py +0 -3
  51. mteb/tasks/classification/kor/kor_hate_classification.py +0 -12
  52. mteb/tasks/clustering/swe/swedn_clustering.py +2 -2
  53. mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py +1 -1
  54. {mteb-2.0.4.dist-info → mteb-2.0.5.dist-info}/METADATA +1 -1
  55. {mteb-2.0.4.dist-info → mteb-2.0.5.dist-info}/RECORD +59 -40
  56. {mteb-2.0.4.dist-info → mteb-2.0.5.dist-info}/WHEEL +0 -0
  57. {mteb-2.0.4.dist-info → mteb-2.0.5.dist-info}/entry_points.txt +0 -0
  58. {mteb-2.0.4.dist-info → mteb-2.0.5.dist-info}/licenses/LICENSE +0 -0
  59. {mteb-2.0.4.dist-info → mteb-2.0.5.dist-info}/top_level.txt +0 -0
@@ -1,554 +1,630 @@
1
1
  {
2
2
  "dev": {
3
- "num_samples": 1260008,
4
- "number_of_characters": 584993395,
5
- "num_documents": 1247484,
6
- "min_document_length": 7,
7
- "average_document_length": 468.57240814311047,
8
- "max_document_length": 48058,
9
- "unique_documents": 1247484,
10
- "num_queries": 12524,
11
- "min_query_length": 5,
12
- "average_query_length": 36.47500798466943,
13
- "max_query_length": 176,
14
- "unique_queries": 12524,
15
- "none_queries": 0,
16
- "num_relevant_docs": 1247483,
17
- "min_relevant_docs_per_query": 0,
18
- "average_relevant_docs_per_query": 1.8850207601405302,
19
- "max_relevant_docs_per_query": 100,
20
- "unique_relevant_docs": 1247483,
21
- "num_instructions": null,
22
- "min_instruction_length": null,
23
- "average_instruction_length": null,
24
- "max_instruction_length": null,
25
- "unique_instructions": null,
26
- "num_top_ranked": 12524,
27
- "min_top_ranked_per_query": 1,
28
- "average_top_ranked_per_query": 99.60747365059086,
29
- "max_top_ranked_per_query": 100,
3
+ "num_samples": 1260007,
4
+ "number_of_characters": 584993390,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 584536582,
7
+ "min_text_length": 7,
8
+ "average_text_length": 468.57240814311047,
9
+ "max_text_length": 48058,
10
+ "unique_texts": 1039137
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 456808,
15
+ "min_text_length": 7,
16
+ "average_text_length": 36.47752136069632,
17
+ "max_text_length": 176,
18
+ "unique_texts": 12518
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 23608,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 1.885171284835902,
25
+ "max_relevant_docs_per_query": 100,
26
+ "unique_relevant_docs": 1247483
27
+ },
28
+ "top_ranked_statistics": {
29
+ "num_top_ranked": 1247484,
30
+ "min_top_ranked_per_query": 1,
31
+ "average_top_ranked_per_query": 99.61542761319173,
32
+ "max_top_ranked_per_query": 100
33
+ },
30
34
  "hf_subset_descriptive_stats": {
31
35
  "ar": {
32
36
  "num_samples": 290077,
33
37
  "number_of_characters": 139203930,
34
- "num_documents": 287181,
35
- "min_document_length": 9,
36
- "average_document_length": 484.42812720897274,
37
- "max_document_length": 48058,
38
- "unique_documents": 287181,
39
- "num_queries": 2896,
40
- "min_query_length": 12,
41
- "average_query_length": 29.480662983425415,
42
- "max_query_length": 101,
43
- "unique_queries": 2896,
44
- "none_queries": 0,
45
- "num_relevant_docs": 287181,
46
- "min_relevant_docs_per_query": 1,
47
- "average_relevant_docs_per_query": 1.7178867403314917,
48
- "max_relevant_docs_per_query": 100,
49
- "unique_relevant_docs": 287181,
50
- "num_instructions": null,
51
- "min_instruction_length": null,
52
- "average_instruction_length": null,
53
- "max_instruction_length": null,
54
- "unique_instructions": null,
55
- "num_top_ranked": 2896,
56
- "min_top_ranked_per_query": 1,
57
- "average_top_ranked_per_query": 99.16470994475138,
58
- "max_top_ranked_per_query": 100
38
+ "documents_text_statistics": {
39
+ "total_text_length": 139118554,
40
+ "min_text_length": 9,
41
+ "average_text_length": 484.42812720897274,
42
+ "max_text_length": 48058,
43
+ "unique_texts": 216899
44
+ },
45
+ "documents_image_statistics": null,
46
+ "queries_text_statistics": {
47
+ "total_text_length": 85376,
48
+ "min_text_length": 12,
49
+ "average_text_length": 29.480662983425415,
50
+ "max_text_length": 101,
51
+ "unique_texts": 2896
52
+ },
53
+ "queries_image_statistics": null,
54
+ "relevant_docs_statistics": {
55
+ "num_relevant_docs": 4975,
56
+ "min_relevant_docs_per_query": 1,
57
+ "average_relevant_docs_per_query": 1.7178867403314917,
58
+ "max_relevant_docs_per_query": 100,
59
+ "unique_relevant_docs": 287181
60
+ },
61
+ "top_ranked_statistics": {
62
+ "num_top_ranked": 287181,
63
+ "min_top_ranked_per_query": 1,
64
+ "average_top_ranked_per_query": 99.16470994475138,
65
+ "max_top_ranked_per_query": 100
66
+ }
59
67
  },
60
68
  "bn": {
61
69
  "num_samples": 41466,
62
70
  "number_of_characters": 22936306,
63
- "num_documents": 41055,
64
- "min_document_length": 12,
65
- "average_document_length": 558.202338326635,
66
- "max_document_length": 16749,
67
- "unique_documents": 41055,
68
- "num_queries": 411,
69
- "min_query_length": 16,
70
- "average_query_length": 46.98053527980535,
71
- "max_query_length": 112,
72
- "unique_queries": 411,
73
- "none_queries": 0,
74
- "num_relevant_docs": 41055,
75
- "min_relevant_docs_per_query": 55,
76
- "average_relevant_docs_per_query": 1.9172749391727495,
77
- "max_relevant_docs_per_query": 100,
78
- "unique_relevant_docs": 41055,
79
- "num_instructions": null,
80
- "min_instruction_length": null,
81
- "average_instruction_length": null,
82
- "max_instruction_length": null,
83
- "unique_instructions": null,
84
- "num_top_ranked": 411,
85
- "min_top_ranked_per_query": 55,
86
- "average_top_ranked_per_query": 99.8905109489051,
87
- "max_top_ranked_per_query": 100
71
+ "documents_text_statistics": {
72
+ "total_text_length": 22916997,
73
+ "min_text_length": 12,
74
+ "average_text_length": 558.202338326635,
75
+ "max_text_length": 16749,
76
+ "unique_texts": 31124
77
+ },
78
+ "documents_image_statistics": null,
79
+ "queries_text_statistics": {
80
+ "total_text_length": 19309,
81
+ "min_text_length": 16,
82
+ "average_text_length": 46.98053527980535,
83
+ "max_text_length": 112,
84
+ "unique_texts": 411
85
+ },
86
+ "queries_image_statistics": null,
87
+ "relevant_docs_statistics": {
88
+ "num_relevant_docs": 788,
89
+ "min_relevant_docs_per_query": 55,
90
+ "average_relevant_docs_per_query": 1.9172749391727495,
91
+ "max_relevant_docs_per_query": 100,
92
+ "unique_relevant_docs": 41055
93
+ },
94
+ "top_ranked_statistics": {
95
+ "num_top_ranked": 41055,
96
+ "min_top_ranked_per_query": 55,
97
+ "average_top_ranked_per_query": 99.8905109489051,
98
+ "max_top_ranked_per_query": 100
99
+ }
88
100
  },
89
101
  "de": {
90
102
  "num_samples": 30704,
91
103
  "number_of_characters": 16502961,
92
- "num_documents": 30400,
93
- "min_document_length": 13,
94
- "average_document_length": 542.3999013157895,
95
- "max_document_length": 5224,
96
- "unique_documents": 30400,
97
- "num_queries": 304,
98
- "min_query_length": 15,
99
- "average_query_length": 46.06578947368421,
100
- "max_query_length": 87,
101
- "unique_queries": 304,
102
- "none_queries": 0,
103
- "num_relevant_docs": 30400,
104
- "min_relevant_docs_per_query": 100,
105
- "average_relevant_docs_per_query": 1.542763157894737,
106
- "max_relevant_docs_per_query": 100,
107
- "unique_relevant_docs": 30400,
108
- "num_instructions": null,
109
- "min_instruction_length": null,
110
- "average_instruction_length": null,
111
- "max_instruction_length": null,
112
- "unique_instructions": null,
113
- "num_top_ranked": 304,
114
- "min_top_ranked_per_query": 100,
115
- "average_top_ranked_per_query": 100.0,
116
- "max_top_ranked_per_query": 100
104
+ "documents_text_statistics": {
105
+ "total_text_length": 16488957,
106
+ "min_text_length": 13,
107
+ "average_text_length": 542.3999013157895,
108
+ "max_text_length": 5224,
109
+ "unique_texts": 29539
110
+ },
111
+ "documents_image_statistics": null,
112
+ "queries_text_statistics": {
113
+ "total_text_length": 14004,
114
+ "min_text_length": 15,
115
+ "average_text_length": 46.06578947368421,
116
+ "max_text_length": 87,
117
+ "unique_texts": 303
118
+ },
119
+ "queries_image_statistics": null,
120
+ "relevant_docs_statistics": {
121
+ "num_relevant_docs": 469,
122
+ "min_relevant_docs_per_query": 100,
123
+ "average_relevant_docs_per_query": 1.542763157894737,
124
+ "max_relevant_docs_per_query": 100,
125
+ "unique_relevant_docs": 30400
126
+ },
127
+ "top_ranked_statistics": {
128
+ "num_top_ranked": 30400,
129
+ "min_top_ranked_per_query": 100,
130
+ "average_top_ranked_per_query": 100.0,
131
+ "max_top_ranked_per_query": 100
132
+ }
117
133
  },
118
134
  "en": {
119
135
  "num_samples": 79487,
120
136
  "number_of_characters": 51198664,
121
- "num_documents": 78700,
122
- "min_document_length": 19,
123
- "average_document_length": 650.1517153748412,
124
- "max_document_length": 8110,
125
- "unique_documents": 78700,
126
- "num_queries": 787,
127
- "min_query_length": 16,
128
- "average_query_length": 40.31003811944092,
129
- "max_query_length": 122,
130
- "unique_queries": 787,
131
- "none_queries": 0,
132
- "num_relevant_docs": 78700,
133
- "min_relevant_docs_per_query": 100,
134
- "average_relevant_docs_per_query": 2.3824650571791612,
135
- "max_relevant_docs_per_query": 100,
136
- "unique_relevant_docs": 78700,
137
- "num_instructions": null,
138
- "min_instruction_length": null,
139
- "average_instruction_length": null,
140
- "max_instruction_length": null,
141
- "unique_instructions": null,
142
- "num_top_ranked": 787,
143
- "min_top_ranked_per_query": 100,
144
- "average_top_ranked_per_query": 100.0,
145
- "max_top_ranked_per_query": 100
137
+ "documents_text_statistics": {
138
+ "total_text_length": 51166940,
139
+ "min_text_length": 19,
140
+ "average_text_length": 650.1517153748412,
141
+ "max_text_length": 8110,
142
+ "unique_texts": 75329
143
+ },
144
+ "documents_image_statistics": null,
145
+ "queries_text_statistics": {
146
+ "total_text_length": 31724,
147
+ "min_text_length": 16,
148
+ "average_text_length": 40.31003811944092,
149
+ "max_text_length": 122,
150
+ "unique_texts": 787
151
+ },
152
+ "queries_image_statistics": null,
153
+ "relevant_docs_statistics": {
154
+ "num_relevant_docs": 1875,
155
+ "min_relevant_docs_per_query": 100,
156
+ "average_relevant_docs_per_query": 2.3824650571791612,
157
+ "max_relevant_docs_per_query": 100,
158
+ "unique_relevant_docs": 78700
159
+ },
160
+ "top_ranked_statistics": {
161
+ "num_top_ranked": 78700,
162
+ "min_top_ranked_per_query": 100,
163
+ "average_top_ranked_per_query": 100.0,
164
+ "max_top_ranked_per_query": 100
165
+ }
146
166
  },
147
167
  "es": {
148
168
  "num_samples": 62317,
149
169
  "number_of_characters": 34643777,
150
- "num_documents": 61700,
151
- "min_document_length": 21,
152
- "average_document_length": 561.0117341977309,
153
- "max_document_length": 21550,
154
- "unique_documents": 61700,
155
- "num_queries": 617,
156
- "min_query_length": 19,
157
- "average_query_length": 47.573743922204216,
158
- "max_query_length": 88,
159
- "unique_queries": 617,
160
- "none_queries": 0,
161
- "num_relevant_docs": 61700,
162
- "min_relevant_docs_per_query": 100,
163
- "average_relevant_docs_per_query": 3.053484602917342,
164
- "max_relevant_docs_per_query": 100,
165
- "unique_relevant_docs": 61700,
166
- "num_instructions": null,
167
- "min_instruction_length": null,
168
- "average_instruction_length": null,
169
- "max_instruction_length": null,
170
- "unique_instructions": null,
171
- "num_top_ranked": 617,
172
- "min_top_ranked_per_query": 100,
173
- "average_top_ranked_per_query": 100.0,
174
- "max_top_ranked_per_query": 100
170
+ "documents_text_statistics": {
171
+ "total_text_length": 34614424,
172
+ "min_text_length": 21,
173
+ "average_text_length": 561.0117341977309,
174
+ "max_text_length": 21550,
175
+ "unique_texts": 59856
176
+ },
177
+ "documents_image_statistics": null,
178
+ "queries_text_statistics": {
179
+ "total_text_length": 29353,
180
+ "min_text_length": 19,
181
+ "average_text_length": 47.573743922204216,
182
+ "max_text_length": 88,
183
+ "unique_texts": 617
184
+ },
185
+ "queries_image_statistics": null,
186
+ "relevant_docs_statistics": {
187
+ "num_relevant_docs": 1884,
188
+ "min_relevant_docs_per_query": 100,
189
+ "average_relevant_docs_per_query": 3.053484602917342,
190
+ "max_relevant_docs_per_query": 100,
191
+ "unique_relevant_docs": 61700
192
+ },
193
+ "top_ranked_statistics": {
194
+ "num_top_ranked": 61700,
195
+ "min_top_ranked_per_query": 100,
196
+ "average_top_ranked_per_query": 100.0,
197
+ "max_top_ranked_per_query": 100
198
+ }
175
199
  },
176
200
  "fa": {
177
201
  "num_samples": 63832,
178
202
  "number_of_characters": 27767876,
179
- "num_documents": 63200,
180
- "min_document_length": 14,
181
- "average_document_length": 438.95362341772153,
182
- "max_document_length": 8151,
183
- "unique_documents": 63200,
184
- "num_queries": 632,
185
- "min_query_length": 18,
186
- "average_query_length": 41.1503164556962,
187
- "max_query_length": 82,
188
- "unique_queries": 632,
189
- "none_queries": 0,
190
- "num_relevant_docs": 63200,
191
- "min_relevant_docs_per_query": 100,
192
- "average_relevant_docs_per_query": 1.4667721518987342,
193
- "max_relevant_docs_per_query": 100,
194
- "unique_relevant_docs": 63200,
195
- "num_instructions": null,
196
- "min_instruction_length": null,
197
- "average_instruction_length": null,
198
- "max_instruction_length": null,
199
- "unique_instructions": null,
200
- "num_top_ranked": 632,
201
- "min_top_ranked_per_query": 100,
202
- "average_top_ranked_per_query": 100.0,
203
- "max_top_ranked_per_query": 100
203
+ "documents_text_statistics": {
204
+ "total_text_length": 27741869,
205
+ "min_text_length": 14,
206
+ "average_text_length": 438.95362341772153,
207
+ "max_text_length": 8151,
208
+ "unique_texts": 57660
209
+ },
210
+ "documents_image_statistics": null,
211
+ "queries_text_statistics": {
212
+ "total_text_length": 26007,
213
+ "min_text_length": 18,
214
+ "average_text_length": 41.1503164556962,
215
+ "max_text_length": 82,
216
+ "unique_texts": 631
217
+ },
218
+ "queries_image_statistics": null,
219
+ "relevant_docs_statistics": {
220
+ "num_relevant_docs": 927,
221
+ "min_relevant_docs_per_query": 100,
222
+ "average_relevant_docs_per_query": 1.4667721518987342,
223
+ "max_relevant_docs_per_query": 100,
224
+ "unique_relevant_docs": 63200
225
+ },
226
+ "top_ranked_statistics": {
227
+ "num_top_ranked": 63200,
228
+ "min_top_ranked_per_query": 100,
229
+ "average_top_ranked_per_query": 100.0,
230
+ "max_top_ranked_per_query": 100
231
+ }
204
232
  },
205
233
  "fi": {
206
234
  "num_samples": 117879,
207
235
  "number_of_characters": 52848523,
208
- "num_documents": 116696,
209
- "min_document_length": 13,
210
- "average_document_length": 452.48052204017273,
211
- "max_document_length": 6755,
212
- "unique_documents": 116696,
213
- "num_queries": 1183,
214
- "min_query_length": 14,
215
- "average_query_length": 38.76246830092984,
216
- "max_query_length": 130,
217
- "unique_queries": 1183,
218
- "none_queries": 0,
219
- "num_relevant_docs": 116696,
220
- "min_relevant_docs_per_query": 3,
221
- "average_relevant_docs_per_query": 1.7557058326289094,
222
- "max_relevant_docs_per_query": 100,
223
- "unique_relevant_docs": 116696,
224
- "num_instructions": null,
225
- "min_instruction_length": null,
226
- "average_instruction_length": null,
227
- "max_instruction_length": null,
228
- "unique_instructions": null,
229
- "num_top_ranked": 1183,
230
- "min_top_ranked_per_query": 3,
231
- "average_top_ranked_per_query": 98.64412510566356,
232
- "max_top_ranked_per_query": 100
236
+ "documents_text_statistics": {
237
+ "total_text_length": 52802667,
238
+ "min_text_length": 13,
239
+ "average_text_length": 452.48052204017273,
240
+ "max_text_length": 6755,
241
+ "unique_texts": 97195
242
+ },
243
+ "documents_image_statistics": null,
244
+ "queries_text_statistics": {
245
+ "total_text_length": 45856,
246
+ "min_text_length": 14,
247
+ "average_text_length": 38.76246830092984,
248
+ "max_text_length": 130,
249
+ "unique_texts": 1183
250
+ },
251
+ "queries_image_statistics": null,
252
+ "relevant_docs_statistics": {
253
+ "num_relevant_docs": 2077,
254
+ "min_relevant_docs_per_query": 3,
255
+ "average_relevant_docs_per_query": 1.7557058326289094,
256
+ "max_relevant_docs_per_query": 100,
257
+ "unique_relevant_docs": 116696
258
+ },
259
+ "top_ranked_statistics": {
260
+ "num_top_ranked": 116696,
261
+ "min_top_ranked_per_query": 3,
262
+ "average_top_ranked_per_query": 98.64412510566356,
263
+ "max_top_ranked_per_query": 100
264
+ }
233
265
  },
234
266
  "fr": {
235
267
  "num_samples": 34643,
236
268
  "number_of_characters": 17084953,
237
- "num_documents": 34300,
238
- "min_document_length": 25,
239
- "average_document_length": 497.66475218658894,
240
- "max_document_length": 4404,
241
- "unique_documents": 34300,
242
- "num_queries": 343,
243
- "min_query_length": 16,
244
- "average_query_length": 43.883381924198254,
245
- "max_query_length": 83,
246
- "unique_queries": 343,
247
- "none_queries": 0,
248
- "num_relevant_docs": 34300,
249
- "min_relevant_docs_per_query": 100,
250
- "average_relevant_docs_per_query": 1.3877551020408163,
251
- "max_relevant_docs_per_query": 100,
252
- "unique_relevant_docs": 34300,
253
- "num_instructions": null,
254
- "min_instruction_length": null,
255
- "average_instruction_length": null,
256
- "max_instruction_length": null,
257
- "unique_instructions": null,
258
- "num_top_ranked": 343,
259
- "min_top_ranked_per_query": 100,
260
- "average_top_ranked_per_query": 100.0,
261
- "max_top_ranked_per_query": 100
269
+ "documents_text_statistics": {
270
+ "total_text_length": 17069901,
271
+ "min_text_length": 25,
272
+ "average_text_length": 497.66475218658894,
273
+ "max_text_length": 4404,
274
+ "unique_texts": 33762
275
+ },
276
+ "documents_image_statistics": null,
277
+ "queries_text_statistics": {
278
+ "total_text_length": 15052,
279
+ "min_text_length": 16,
280
+ "average_text_length": 43.883381924198254,
281
+ "max_text_length": 83,
282
+ "unique_texts": 343
283
+ },
284
+ "queries_image_statistics": null,
285
+ "relevant_docs_statistics": {
286
+ "num_relevant_docs": 476,
287
+ "min_relevant_docs_per_query": 100,
288
+ "average_relevant_docs_per_query": 1.3877551020408163,
289
+ "max_relevant_docs_per_query": 100,
290
+ "unique_relevant_docs": 34300
291
+ },
292
+ "top_ranked_statistics": {
293
+ "num_top_ranked": 34300,
294
+ "min_top_ranked_per_query": 100,
295
+ "average_top_ranked_per_query": 100.0,
296
+ "max_top_ranked_per_query": 100
297
+ }
262
298
  },
263
299
  "hi": {
264
300
  "num_samples": 35350,
265
301
  "number_of_characters": 21162593,
266
- "num_documents": 35000,
267
- "min_document_length": 13,
268
- "average_document_length": 604.1121142857143,
269
- "max_document_length": 29681,
270
- "unique_documents": 35000,
271
- "num_queries": 350,
272
- "min_query_length": 24,
273
- "average_query_length": 53.34,
274
- "max_query_length": 120,
275
- "unique_queries": 350,
276
- "none_queries": 0,
277
- "num_relevant_docs": 35000,
278
- "min_relevant_docs_per_query": 100,
279
- "average_relevant_docs_per_query": 1.9142857142857144,
280
- "max_relevant_docs_per_query": 100,
281
- "unique_relevant_docs": 35000,
282
- "num_instructions": null,
283
- "min_instruction_length": null,
284
- "average_instruction_length": null,
285
- "max_instruction_length": null,
286
- "unique_instructions": null,
287
- "num_top_ranked": 350,
288
- "min_top_ranked_per_query": 100,
289
- "average_top_ranked_per_query": 100.0,
290
- "max_top_ranked_per_query": 100
302
+ "documents_text_statistics": {
303
+ "total_text_length": 21143924,
304
+ "min_text_length": 13,
305
+ "average_text_length": 604.1121142857143,
306
+ "max_text_length": 29681,
307
+ "unique_texts": 30405
308
+ },
309
+ "documents_image_statistics": null,
310
+ "queries_text_statistics": {
311
+ "total_text_length": 18669,
312
+ "min_text_length": 24,
313
+ "average_text_length": 53.34,
314
+ "max_text_length": 120,
315
+ "unique_texts": 350
316
+ },
317
+ "queries_image_statistics": null,
318
+ "relevant_docs_statistics": {
319
+ "num_relevant_docs": 670,
320
+ "min_relevant_docs_per_query": 100,
321
+ "average_relevant_docs_per_query": 1.9142857142857144,
322
+ "max_relevant_docs_per_query": 100,
323
+ "unique_relevant_docs": 35000
324
+ },
325
+ "top_ranked_statistics": {
326
+ "num_top_ranked": 35000,
327
+ "min_top_ranked_per_query": 100,
328
+ "average_top_ranked_per_query": 100.0,
329
+ "max_top_ranked_per_query": 100
330
+ }
291
331
  },
292
332
  "id": {
293
333
  "num_samples": 94149,
294
334
  "number_of_characters": 51428701,
295
- "num_documents": 93210,
296
- "min_document_length": 9,
297
- "average_document_length": 551.3677395129278,
298
- "max_document_length": 13961,
299
- "unique_documents": 93210,
300
- "num_queries": 939,
301
- "min_query_length": 13,
302
- "average_query_length": 38.03407880724175,
303
- "max_query_length": 93,
304
- "unique_queries": 939,
305
- "none_queries": 0,
306
- "num_relevant_docs": 93210,
307
- "min_relevant_docs_per_query": 3,
308
- "average_relevant_docs_per_query": 2.774227902023429,
309
- "max_relevant_docs_per_query": 100,
310
- "unique_relevant_docs": 93210,
311
- "num_instructions": null,
312
- "min_instruction_length": null,
313
- "average_instruction_length": null,
314
- "max_instruction_length": null,
315
- "unique_instructions": null,
316
- "num_top_ranked": 939,
317
- "min_top_ranked_per_query": 3,
318
- "average_top_ranked_per_query": 99.26517571884985,
319
- "max_top_ranked_per_query": 100
335
+ "documents_text_statistics": {
336
+ "total_text_length": 51392987,
337
+ "min_text_length": 9,
338
+ "average_text_length": 551.3677395129278,
339
+ "max_text_length": 13961,
340
+ "unique_texts": 76938
341
+ },
342
+ "documents_image_statistics": null,
343
+ "queries_text_statistics": {
344
+ "total_text_length": 35714,
345
+ "min_text_length": 13,
346
+ "average_text_length": 38.03407880724175,
347
+ "max_text_length": 93,
348
+ "unique_texts": 939
349
+ },
350
+ "queries_image_statistics": null,
351
+ "relevant_docs_statistics": {
352
+ "num_relevant_docs": 2605,
353
+ "min_relevant_docs_per_query": 3,
354
+ "average_relevant_docs_per_query": 2.774227902023429,
355
+ "max_relevant_docs_per_query": 100,
356
+ "unique_relevant_docs": 93210
357
+ },
358
+ "top_ranked_statistics": {
359
+ "num_top_ranked": 93210,
360
+ "min_top_ranked_per_query": 3,
361
+ "average_top_ranked_per_query": 99.26517571884985,
362
+ "max_top_ranked_per_query": 100
363
+ }
320
364
  },
321
365
  "ja": {
322
366
  "num_samples": 80497,
323
367
  "number_of_characters": 17053080,
324
- "num_documents": 79700,
325
- "min_document_length": 7,
326
- "average_document_length": 213.78840652446675,
327
- "max_document_length": 6592,
328
- "unique_documents": 79700,
329
- "num_queries": 797,
330
- "min_query_length": 7,
331
- "average_query_length": 17.7465495608532,
332
- "max_query_length": 48,
333
- "unique_queries": 797,
334
- "none_queries": 0,
335
- "num_relevant_docs": 79700,
336
- "min_relevant_docs_per_query": 100,
337
- "average_relevant_docs_per_query": 1.7465495608531996,
338
- "max_relevant_docs_per_query": 100,
339
- "unique_relevant_docs": 79700,
340
- "num_instructions": null,
341
- "min_instruction_length": null,
342
- "average_instruction_length": null,
343
- "max_instruction_length": null,
344
- "unique_instructions": null,
345
- "num_top_ranked": 797,
346
- "min_top_ranked_per_query": 100,
347
- "average_top_ranked_per_query": 100.0,
348
- "max_top_ranked_per_query": 100
368
+ "documents_text_statistics": {
369
+ "total_text_length": 17038936,
370
+ "min_text_length": 7,
371
+ "average_text_length": 213.78840652446675,
372
+ "max_text_length": 6592,
373
+ "unique_texts": 74579
374
+ },
375
+ "documents_image_statistics": null,
376
+ "queries_text_statistics": {
377
+ "total_text_length": 14144,
378
+ "min_text_length": 7,
379
+ "average_text_length": 17.7465495608532,
380
+ "max_text_length": 48,
381
+ "unique_texts": 797
382
+ },
383
+ "queries_image_statistics": null,
384
+ "relevant_docs_statistics": {
385
+ "num_relevant_docs": 1392,
386
+ "min_relevant_docs_per_query": 100,
387
+ "average_relevant_docs_per_query": 1.7465495608531996,
388
+ "max_relevant_docs_per_query": 100,
389
+ "unique_relevant_docs": 79700
390
+ },
391
+ "top_ranked_statistics": {
392
+ "num_top_ranked": 79700,
393
+ "min_top_ranked_per_query": 100,
394
+ "average_top_ranked_per_query": 100.0,
395
+ "max_top_ranked_per_query": 100
396
+ }
349
397
  },
350
398
  "ko": {
351
- "num_samples": 21414,
352
- "number_of_characters": 5439444,
353
- "num_documents": 21201,
354
- "min_document_length": 11,
355
- "average_document_length": 256.34819112306025,
356
- "max_document_length": 4838,
357
- "unique_documents": 21201,
358
- "num_queries": 213,
359
- "min_query_length": 5,
360
- "average_query_length": 21.624413145539908,
361
- "max_query_length": 92,
362
- "unique_queries": 213,
363
- "none_queries": 0,
364
- "num_relevant_docs": 21200,
365
- "min_relevant_docs_per_query": 0,
366
- "average_relevant_docs_per_query": 1.9812206572769953,
367
- "max_relevant_docs_per_query": 100,
368
- "unique_relevant_docs": 21200,
369
- "num_instructions": null,
370
- "min_instruction_length": null,
371
- "average_instruction_length": null,
372
- "max_instruction_length": null,
373
- "unique_instructions": null,
374
- "num_top_ranked": 213,
375
- "min_top_ranked_per_query": 1,
376
- "average_top_ranked_per_query": 99.53521126760563,
377
- "max_top_ranked_per_query": 100
399
+ "num_samples": 21413,
400
+ "number_of_characters": 5439439,
401
+ "documents_text_statistics": {
402
+ "total_text_length": 5434838,
403
+ "min_text_length": 11,
404
+ "average_text_length": 256.34819112306025,
405
+ "max_text_length": 4838,
406
+ "unique_texts": 17609
407
+ },
408
+ "documents_image_statistics": null,
409
+ "queries_text_statistics": {
410
+ "total_text_length": 4601,
411
+ "min_text_length": 8,
412
+ "average_text_length": 21.702830188679247,
413
+ "max_text_length": 92,
414
+ "unique_texts": 212
415
+ },
416
+ "queries_image_statistics": null,
417
+ "relevant_docs_statistics": {
418
+ "num_relevant_docs": 422,
419
+ "min_relevant_docs_per_query": 100,
420
+ "average_relevant_docs_per_query": 1.990566037735849,
421
+ "max_relevant_docs_per_query": 100,
422
+ "unique_relevant_docs": 21200
423
+ },
424
+ "top_ranked_statistics": {
425
+ "num_top_ranked": 21201,
426
+ "min_top_ranked_per_query": 1,
427
+ "average_top_ranked_per_query": 100.00471698113208,
428
+ "max_top_ranked_per_query": 100
429
+ }
378
430
  },
379
431
  "ru": {
380
432
  "num_samples": 125947,
381
433
  "number_of_characters": 59556512,
382
- "num_documents": 124700,
383
- "min_document_length": 8,
384
- "average_document_length": 477.1567441860465,
385
- "max_document_length": 12427,
386
- "unique_documents": 124700,
387
- "num_queries": 1247,
388
- "min_query_length": 15,
389
- "average_query_length": 44.15878107457899,
390
- "max_query_length": 108,
391
- "unique_queries": 1247,
392
- "none_queries": 0,
393
- "num_relevant_docs": 124700,
394
- "min_relevant_docs_per_query": 100,
395
- "average_relevant_docs_per_query": 1.9534883720930232,
396
- "max_relevant_docs_per_query": 100,
397
- "unique_relevant_docs": 124700,
398
- "num_instructions": null,
399
- "min_instruction_length": null,
400
- "average_instruction_length": null,
401
- "max_instruction_length": null,
402
- "unique_instructions": null,
403
- "num_top_ranked": 1247,
404
- "min_top_ranked_per_query": 100,
405
- "average_top_ranked_per_query": 100.0,
406
- "max_top_ranked_per_query": 100
434
+ "documents_text_statistics": {
435
+ "total_text_length": 59501446,
436
+ "min_text_length": 8,
437
+ "average_text_length": 477.1567441860465,
438
+ "max_text_length": 12427,
439
+ "unique_texts": 113624
440
+ },
441
+ "documents_image_statistics": null,
442
+ "queries_text_statistics": {
443
+ "total_text_length": 55066,
444
+ "min_text_length": 15,
445
+ "average_text_length": 44.15878107457899,
446
+ "max_text_length": 108,
447
+ "unique_texts": 1247
448
+ },
449
+ "queries_image_statistics": null,
450
+ "relevant_docs_statistics": {
451
+ "num_relevant_docs": 2436,
452
+ "min_relevant_docs_per_query": 100,
453
+ "average_relevant_docs_per_query": 1.9534883720930232,
454
+ "max_relevant_docs_per_query": 100,
455
+ "unique_relevant_docs": 124700
456
+ },
457
+ "top_ranked_statistics": {
458
+ "num_top_ranked": 124700,
459
+ "min_top_ranked_per_query": 100,
460
+ "average_top_ranked_per_query": 100.0,
461
+ "max_top_ranked_per_query": 100
462
+ }
407
463
  },
408
464
  "sw": {
409
465
  "num_samples": 48581,
410
466
  "number_of_characters": 14840684,
411
- "num_documents": 48100,
412
- "min_document_length": 10,
413
- "average_document_length": 308.14927234927234,
414
- "max_document_length": 6048,
415
- "unique_documents": 48100,
416
- "num_queries": 481,
417
- "min_query_length": 13,
418
- "average_query_length": 38.88565488565489,
419
- "max_query_length": 75,
420
- "unique_queries": 481,
421
- "none_queries": 0,
422
- "num_relevant_docs": 48100,
423
- "min_relevant_docs_per_query": 100,
424
- "average_relevant_docs_per_query": 1.3846153846153846,
425
- "max_relevant_docs_per_query": 100,
426
- "unique_relevant_docs": 48100,
427
- "num_instructions": null,
428
- "min_instruction_length": null,
429
- "average_instruction_length": null,
430
- "max_instruction_length": null,
431
- "unique_instructions": null,
432
- "num_top_ranked": 481,
433
- "min_top_ranked_per_query": 100,
434
- "average_top_ranked_per_query": 100.0,
435
- "max_top_ranked_per_query": 100
467
+ "documents_text_statistics": {
468
+ "total_text_length": 14821980,
469
+ "min_text_length": 10,
470
+ "average_text_length": 308.14927234927234,
471
+ "max_text_length": 6048,
472
+ "unique_texts": 24242
473
+ },
474
+ "documents_image_statistics": null,
475
+ "queries_text_statistics": {
476
+ "total_text_length": 18704,
477
+ "min_text_length": 13,
478
+ "average_text_length": 38.88565488565489,
479
+ "max_text_length": 75,
480
+ "unique_texts": 481
481
+ },
482
+ "queries_image_statistics": null,
483
+ "relevant_docs_statistics": {
484
+ "num_relevant_docs": 666,
485
+ "min_relevant_docs_per_query": 100,
486
+ "average_relevant_docs_per_query": 1.3846153846153846,
487
+ "max_relevant_docs_per_query": 100,
488
+ "unique_relevant_docs": 48100
489
+ },
490
+ "top_ranked_statistics": {
491
+ "num_top_ranked": 48100,
492
+ "min_top_ranked_per_query": 100,
493
+ "average_top_ranked_per_query": 100.0,
494
+ "max_top_ranked_per_query": 100
495
+ }
436
496
  },
437
497
  "te": {
438
498
  "num_samples": 8484,
439
499
  "number_of_characters": 3910478,
440
- "num_documents": 8400,
441
- "min_document_length": 19,
442
- "average_document_length": 465.1484523809524,
443
- "max_document_length": 8736,
444
- "unique_documents": 8400,
445
- "num_queries": 84,
446
- "min_query_length": 24,
447
- "average_query_length": 38.464285714285715,
448
- "max_query_length": 64,
449
- "unique_queries": 84,
450
- "none_queries": 0,
451
- "num_relevant_docs": 8400,
452
- "min_relevant_docs_per_query": 100,
453
- "average_relevant_docs_per_query": 1.119047619047619,
454
- "max_relevant_docs_per_query": 100,
455
- "unique_relevant_docs": 8400,
456
- "num_instructions": null,
457
- "min_instruction_length": null,
458
- "average_instruction_length": null,
459
- "max_instruction_length": null,
460
- "unique_instructions": null,
461
- "num_top_ranked": 84,
462
- "min_top_ranked_per_query": 100,
463
- "average_top_ranked_per_query": 100.0,
464
- "max_top_ranked_per_query": 100
500
+ "documents_text_statistics": {
501
+ "total_text_length": 3907247,
502
+ "min_text_length": 19,
503
+ "average_text_length": 465.1484523809524,
504
+ "max_text_length": 8736,
505
+ "unique_texts": 5653
506
+ },
507
+ "documents_image_statistics": null,
508
+ "queries_text_statistics": {
509
+ "total_text_length": 3231,
510
+ "min_text_length": 24,
511
+ "average_text_length": 38.464285714285715,
512
+ "max_text_length": 64,
513
+ "unique_texts": 84
514
+ },
515
+ "queries_image_statistics": null,
516
+ "relevant_docs_statistics": {
517
+ "num_relevant_docs": 94,
518
+ "min_relevant_docs_per_query": 100,
519
+ "average_relevant_docs_per_query": 1.119047619047619,
520
+ "max_relevant_docs_per_query": 100,
521
+ "unique_relevant_docs": 8400
522
+ },
523
+ "top_ranked_statistics": {
524
+ "num_top_ranked": 8400,
525
+ "min_top_ranked_per_query": 100,
526
+ "average_top_ranked_per_query": 100.0,
527
+ "max_top_ranked_per_query": 100
528
+ }
465
529
  },
466
530
  "th": {
467
531
  "num_samples": 73671,
468
532
  "number_of_characters": 38321622,
469
- "num_documents": 72941,
470
- "min_document_length": 15,
471
- "average_document_length": 524.949685362142,
472
- "max_document_length": 12078,
473
- "unique_documents": 72941,
474
- "num_queries": 730,
475
- "min_query_length": 14,
476
- "average_query_length": 42.83150684931507,
477
- "max_query_length": 176,
478
- "unique_queries": 730,
479
- "none_queries": 0,
480
- "num_relevant_docs": 72941,
481
- "min_relevant_docs_per_query": 41,
482
- "average_relevant_docs_per_query": 1.632876712328767,
483
- "max_relevant_docs_per_query": 100,
484
- "unique_relevant_docs": 72941,
485
- "num_instructions": null,
486
- "min_instruction_length": null,
487
- "average_instruction_length": null,
488
- "max_instruction_length": null,
489
- "unique_instructions": null,
490
- "num_top_ranked": 730,
491
- "min_top_ranked_per_query": 41,
492
- "average_top_ranked_per_query": 99.91917808219178,
493
- "max_top_ranked_per_query": 100
533
+ "documents_text_statistics": {
534
+ "total_text_length": 38290355,
535
+ "min_text_length": 15,
536
+ "average_text_length": 524.949685362142,
537
+ "max_text_length": 12078,
538
+ "unique_texts": 57439
539
+ },
540
+ "documents_image_statistics": null,
541
+ "queries_text_statistics": {
542
+ "total_text_length": 31267,
543
+ "min_text_length": 14,
544
+ "average_text_length": 42.83150684931507,
545
+ "max_text_length": 176,
546
+ "unique_texts": 730
547
+ },
548
+ "queries_image_statistics": null,
549
+ "relevant_docs_statistics": {
550
+ "num_relevant_docs": 1192,
551
+ "min_relevant_docs_per_query": 41,
552
+ "average_relevant_docs_per_query": 1.632876712328767,
553
+ "max_relevant_docs_per_query": 100,
554
+ "unique_relevant_docs": 72941
555
+ },
556
+ "top_ranked_statistics": {
557
+ "num_top_ranked": 72941,
558
+ "min_top_ranked_per_query": 41,
559
+ "average_top_ranked_per_query": 99.91917808219178,
560
+ "max_top_ranked_per_query": 100
561
+ }
494
562
  },
495
563
  "yo": {
496
564
  "num_samples": 12019,
497
565
  "number_of_characters": 4939804,
498
- "num_documents": 11900,
499
- "min_document_length": 7,
500
- "average_document_length": 414.7326890756303,
501
- "max_document_length": 5793,
502
- "unique_documents": 11900,
503
- "num_queries": 119,
504
- "min_query_length": 25,
505
- "average_query_length": 37.6890756302521,
506
- "max_query_length": 56,
507
- "unique_queries": 119,
508
- "none_queries": 0,
509
- "num_relevant_docs": 11900,
510
- "min_relevant_docs_per_query": 100,
511
- "average_relevant_docs_per_query": 0.8823529411764706,
512
- "max_relevant_docs_per_query": 100,
513
- "unique_relevant_docs": 11900,
514
- "num_instructions": null,
515
- "min_instruction_length": null,
516
- "average_instruction_length": null,
517
- "max_instruction_length": null,
518
- "unique_instructions": null,
519
- "num_top_ranked": 119,
520
- "min_top_ranked_per_query": 100,
521
- "average_top_ranked_per_query": 100.0,
522
- "max_top_ranked_per_query": 100
566
+ "documents_text_statistics": {
567
+ "total_text_length": 4935319,
568
+ "min_text_length": 7,
569
+ "average_text_length": 414.7326890756303,
570
+ "max_text_length": 5793,
571
+ "unique_texts": 4596
572
+ },
573
+ "documents_image_statistics": null,
574
+ "queries_text_statistics": {
575
+ "total_text_length": 4485,
576
+ "min_text_length": 25,
577
+ "average_text_length": 37.6890756302521,
578
+ "max_text_length": 56,
579
+ "unique_texts": 119
580
+ },
581
+ "queries_image_statistics": null,
582
+ "relevant_docs_statistics": {
583
+ "num_relevant_docs": 105,
584
+ "min_relevant_docs_per_query": 100,
585
+ "average_relevant_docs_per_query": 0.8823529411764706,
586
+ "max_relevant_docs_per_query": 100,
587
+ "unique_relevant_docs": 11900
588
+ },
589
+ "top_ranked_statistics": {
590
+ "num_top_ranked": 11900,
591
+ "min_top_ranked_per_query": 100,
592
+ "average_top_ranked_per_query": 100.0,
593
+ "max_top_ranked_per_query": 100
594
+ }
523
595
  },
524
596
  "zh": {
525
597
  "num_samples": 39491,
526
598
  "number_of_characters": 6153487,
527
- "num_documents": 39100,
528
- "min_document_length": 7,
529
- "average_document_length": 157.26959079283887,
530
- "max_document_length": 2629,
531
- "unique_documents": 39100,
532
- "num_queries": 391,
533
- "min_query_length": 7,
534
- "average_query_length": 10.859335038363172,
535
- "max_query_length": 22,
536
- "unique_queries": 391,
537
- "none_queries": 0,
538
- "num_relevant_docs": 39100,
539
- "min_relevant_docs_per_query": 100,
540
- "average_relevant_docs_per_query": 1.4194373401534526,
541
- "max_relevant_docs_per_query": 100,
542
- "unique_relevant_docs": 39100,
543
- "num_instructions": null,
544
- "min_instruction_length": null,
545
- "average_instruction_length": null,
546
- "max_instruction_length": null,
547
- "unique_instructions": null,
548
- "num_top_ranked": 391,
549
- "min_top_ranked_per_query": 100,
550
- "average_top_ranked_per_query": 100.0,
551
- "max_top_ranked_per_query": 100
599
+ "documents_text_statistics": {
600
+ "total_text_length": 6149241,
601
+ "min_text_length": 7,
602
+ "average_text_length": 157.26959079283887,
603
+ "max_text_length": 2629,
604
+ "unique_texts": 32688
605
+ },
606
+ "documents_image_statistics": null,
607
+ "queries_text_statistics": {
608
+ "total_text_length": 4246,
609
+ "min_text_length": 7,
610
+ "average_text_length": 10.859335038363172,
611
+ "max_text_length": 22,
612
+ "unique_texts": 388
613
+ },
614
+ "queries_image_statistics": null,
615
+ "relevant_docs_statistics": {
616
+ "num_relevant_docs": 555,
617
+ "min_relevant_docs_per_query": 100,
618
+ "average_relevant_docs_per_query": 1.4194373401534526,
619
+ "max_relevant_docs_per_query": 100,
620
+ "unique_relevant_docs": 39100
621
+ },
622
+ "top_ranked_statistics": {
623
+ "num_top_ranked": 39100,
624
+ "min_top_ranked_per_query": 100,
625
+ "average_top_ranked_per_query": 100.0,
626
+ "max_top_ranked_per_query": 100
627
+ }
552
628
  }
553
629
  }
554
630
  }