mteb 2.0.4__py3-none-any.whl → 2.0.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/descriptive_stats/BitextMining/BUCC.json +70 -40
- mteb/descriptive_stats/Classification/DKHateClassification.json +40 -24
- mteb/descriptive_stats/Classification/FinancialPhrasebankClassification.json +23 -15
- mteb/descriptive_stats/Classification/ImdbClassification.json +40 -24
- mteb/descriptive_stats/Classification/KorHateClassification.json +23 -15
- mteb/descriptive_stats/Clustering/ArxivClusteringP2P.json +555 -550
- mteb/descriptive_stats/Clustering/ArxivClusteringP2P.v2.json +546 -541
- mteb/descriptive_stats/Clustering/ArxivClusteringS2S.json +555 -550
- mteb/descriptive_stats/Clustering/MLSUMClusteringP2P.json +2466 -2416
- mteb/descriptive_stats/Clustering/RedditClusteringP2P.json +1365 -1360
- mteb/descriptive_stats/Clustering/SNLClustering.json +378 -373
- mteb/descriptive_stats/Clustering/SwednClustering.json +28 -23
- mteb/descriptive_stats/Clustering/VGClustering.json +54 -49
- mteb/descriptive_stats/Image/Any2AnyMultilingualRetrieval/WITT2IRetrieval.json +324 -204
- mteb/descriptive_stats/Image/Any2AnyRetrieval/MemotionI2TRetrieval.json +28 -18
- mteb/descriptive_stats/Image/DocumentUnderstanding/JinaVDRAirbnbSyntheticRetrieval.json +334 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/JinaVDRGitHubReadmeRetrieval.json +544 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/JinaVDRTweetStockSyntheticsRetrieval.json +334 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/JinaVDRWikimediaCommonsDocumentsRetrieval.json +634 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore2ESGReportsRetrieval.json +154 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore2EconomicsReportsRetrieval.json +154 -0
- mteb/descriptive_stats/Image/ImageClassification/Imagenet1k.json +6039 -3007
- mteb/descriptive_stats/Image/ZeroShotClassification/Imagenet1kZeroShot.json +3024 -3010
- mteb/descriptive_stats/Image/ZeroShotClassification/PatchCamelyonZeroShot.json +30 -16
- mteb/descriptive_stats/Reranking/MIRACLReranking.json +555 -479
- mteb/descriptive_stats/Reranking/MindSmallReranking.json +29 -25
- mteb/descriptive_stats/Retrieval/AlloprofRetrieval.json +25 -26
- mteb/descriptive_stats/Retrieval/Code1Retrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/DanFEVER.json +25 -26
- mteb/descriptive_stats/Retrieval/EnglishFinance1Retrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/EnglishFinance2Retrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/EnglishFinance3Retrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/EnglishFinance4Retrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/EnglishHealthcare1Retrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/French1Retrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/FrenchLegal1Retrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/German1Retrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/GermanHealthcare1Retrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/GermanLegal1Retrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/JapaneseCode1Retrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/JapaneseLegal1Retrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/MIRACLRetrieval.json +475 -494
- mteb/descriptive_stats/Retrieval/MSMARCO-Fa.json +25 -26
- mteb/descriptive_stats/Retrieval/MSMARCO.json +25 -84
- mteb/descriptive_stats/Retrieval/Touche2020.json +25 -26
- mteb/descriptive_stats/Summarization/SummEval.json +27 -50
- mteb/descriptive_stats/Summarization/SummEvalFr.json +27 -50
- mteb/models/model_implementations/kalm_models.py +29 -0
- mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +1 -1
- mteb/tasks/classification/eng/financial_phrasebank_classification.py +0 -3
- mteb/tasks/classification/kor/kor_hate_classification.py +0 -12
- mteb/tasks/clustering/swe/swedn_clustering.py +2 -2
- mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py +1 -1
- {mteb-2.0.4.dist-info → mteb-2.0.5.dist-info}/METADATA +1 -1
- {mteb-2.0.4.dist-info → mteb-2.0.5.dist-info}/RECORD +59 -40
- {mteb-2.0.4.dist-info → mteb-2.0.5.dist-info}/WHEEL +0 -0
- {mteb-2.0.4.dist-info → mteb-2.0.5.dist-info}/entry_points.txt +0 -0
- {mteb-2.0.4.dist-info → mteb-2.0.5.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.0.4.dist-info → mteb-2.0.5.dist-info}/top_level.txt +0 -0
|
@@ -2,30 +2,34 @@
|
|
|
2
2
|
"test": {
|
|
3
3
|
"num_samples": 2367791,
|
|
4
4
|
"number_of_characters": 162620316,
|
|
5
|
-
"
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
"
|
|
13
|
-
"
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
"
|
|
21
|
-
"
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
"
|
|
29
|
-
|
|
5
|
+
"documents_text_statistics": {
|
|
6
|
+
"total_text_length": 343340,
|
|
7
|
+
"min_text_length": 11,
|
|
8
|
+
"average_text_length": 65.06348303960584,
|
|
9
|
+
"max_text_length": 176,
|
|
10
|
+
"unique_texts": 5277
|
|
11
|
+
},
|
|
12
|
+
"documents_image_statistics": null,
|
|
13
|
+
"queries_text_statistics": {
|
|
14
|
+
"total_text_length": 162276976,
|
|
15
|
+
"min_text_length": 11,
|
|
16
|
+
"average_text_length": 68.68826004840606,
|
|
17
|
+
"max_text_length": 251,
|
|
18
|
+
"unique_texts": 37162
|
|
19
|
+
},
|
|
20
|
+
"queries_image_statistics": null,
|
|
21
|
+
"relevant_docs_statistics": {
|
|
22
|
+
"num_relevant_docs": 4320958,
|
|
23
|
+
"min_relevant_docs_per_query": 2,
|
|
24
|
+
"average_relevant_docs_per_query": 1.8289660928993436,
|
|
25
|
+
"max_relevant_docs_per_query": 295,
|
|
26
|
+
"unique_relevant_docs": 5277
|
|
27
|
+
},
|
|
28
|
+
"top_ranked_statistics": {
|
|
29
|
+
"num_top_ranked": 97008807,
|
|
30
|
+
"min_top_ranked_per_query": 2,
|
|
31
|
+
"average_top_ranked_per_query": 41.06168556038187,
|
|
32
|
+
"max_top_ranked_per_query": 295
|
|
33
|
+
}
|
|
30
34
|
}
|
|
31
35
|
}
|
|
@@ -1,31 +1,30 @@
|
|
|
1
1
|
{
|
|
2
2
|
"test": {
|
|
3
3
|
"num_samples": 4872,
|
|
4
|
-
"number_of_characters":
|
|
5
|
-
"
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
"
|
|
13
|
-
"
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
"
|
|
21
|
-
"
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
"
|
|
29
|
-
"max_top_ranked_per_query": null
|
|
4
|
+
"number_of_characters": 9352943,
|
|
5
|
+
"documents_text_statistics": {
|
|
6
|
+
"total_text_length": 8957572,
|
|
7
|
+
"min_text_length": 8,
|
|
8
|
+
"average_text_length": 3504.527386541471,
|
|
9
|
+
"max_text_length": 47929,
|
|
10
|
+
"unique_texts": 2556
|
|
11
|
+
},
|
|
12
|
+
"documents_image_statistics": null,
|
|
13
|
+
"queries_text_statistics": {
|
|
14
|
+
"total_text_length": 395371,
|
|
15
|
+
"min_text_length": 8,
|
|
16
|
+
"average_text_length": 170.71286701208982,
|
|
17
|
+
"max_text_length": 2863,
|
|
18
|
+
"unique_texts": 2316
|
|
19
|
+
},
|
|
20
|
+
"queries_image_statistics": null,
|
|
21
|
+
"relevant_docs_statistics": {
|
|
22
|
+
"num_relevant_docs": 2316,
|
|
23
|
+
"min_relevant_docs_per_query": 1,
|
|
24
|
+
"average_relevant_docs_per_query": 1.0,
|
|
25
|
+
"max_relevant_docs_per_query": 1,
|
|
26
|
+
"unique_relevant_docs": 988
|
|
27
|
+
},
|
|
28
|
+
"top_ranked_statistics": null
|
|
30
29
|
}
|
|
31
30
|
}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
{
|
|
2
|
+
"test": {
|
|
3
|
+
"num_samples": 29266,
|
|
4
|
+
"number_of_characters": 13467784,
|
|
5
|
+
"documents_text_statistics": {
|
|
6
|
+
"total_text_length": 11954895,
|
|
7
|
+
"min_text_length": 9,
|
|
8
|
+
"average_text_length": 816.9818219093829,
|
|
9
|
+
"max_text_length": 10005,
|
|
10
|
+
"unique_texts": 14527
|
|
11
|
+
},
|
|
12
|
+
"documents_image_statistics": null,
|
|
13
|
+
"queries_text_statistics": {
|
|
14
|
+
"total_text_length": 1512889,
|
|
15
|
+
"min_text_length": 42,
|
|
16
|
+
"average_text_length": 103.38884712635823,
|
|
17
|
+
"max_text_length": 281,
|
|
18
|
+
"unique_texts": 14600
|
|
19
|
+
},
|
|
20
|
+
"queries_image_statistics": null,
|
|
21
|
+
"relevant_docs_statistics": {
|
|
22
|
+
"num_relevant_docs": 14633,
|
|
23
|
+
"min_relevant_docs_per_query": 1,
|
|
24
|
+
"average_relevant_docs_per_query": 1.0,
|
|
25
|
+
"max_relevant_docs_per_query": 1,
|
|
26
|
+
"unique_relevant_docs": 14633
|
|
27
|
+
},
|
|
28
|
+
"top_ranked_statistics": null
|
|
29
|
+
}
|
|
30
|
+
}
|
|
@@ -1,31 +1,30 @@
|
|
|
1
1
|
{
|
|
2
2
|
"train": {
|
|
3
3
|
"num_samples": 8897,
|
|
4
|
-
"number_of_characters":
|
|
5
|
-
"
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
"
|
|
13
|
-
"
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
"
|
|
21
|
-
"
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
"
|
|
29
|
-
"max_top_ranked_per_query": null
|
|
4
|
+
"number_of_characters": 1108127,
|
|
5
|
+
"documents_text_statistics": {
|
|
6
|
+
"total_text_length": 787759,
|
|
7
|
+
"min_text_length": 28,
|
|
8
|
+
"average_text_length": 312.10736925515056,
|
|
9
|
+
"max_text_length": 1748,
|
|
10
|
+
"unique_texts": 2522
|
|
11
|
+
},
|
|
12
|
+
"documents_image_statistics": null,
|
|
13
|
+
"queries_text_statistics": {
|
|
14
|
+
"total_text_length": 320368,
|
|
15
|
+
"min_text_length": 11,
|
|
16
|
+
"average_text_length": 50.26957476855484,
|
|
17
|
+
"max_text_length": 188,
|
|
18
|
+
"unique_texts": 6373
|
|
19
|
+
},
|
|
20
|
+
"queries_image_statistics": null,
|
|
21
|
+
"relevant_docs_statistics": {
|
|
22
|
+
"num_relevant_docs": 3105,
|
|
23
|
+
"min_relevant_docs_per_query": 1,
|
|
24
|
+
"average_relevant_docs_per_query": 0.48721167425074535,
|
|
25
|
+
"max_relevant_docs_per_query": 3,
|
|
26
|
+
"unique_relevant_docs": 2524
|
|
27
|
+
},
|
|
28
|
+
"top_ranked_statistics": null
|
|
30
29
|
}
|
|
31
30
|
}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
{
|
|
2
|
+
"test": {
|
|
3
|
+
"num_samples": 547,
|
|
4
|
+
"number_of_characters": 512504,
|
|
5
|
+
"documents_text_statistics": {
|
|
6
|
+
"total_text_length": 494755,
|
|
7
|
+
"min_text_length": 251,
|
|
8
|
+
"average_text_length": 1779.6942446043165,
|
|
9
|
+
"max_text_length": 6830,
|
|
10
|
+
"unique_texts": 278
|
|
11
|
+
},
|
|
12
|
+
"documents_image_statistics": null,
|
|
13
|
+
"queries_text_statistics": {
|
|
14
|
+
"total_text_length": 17749,
|
|
15
|
+
"min_text_length": 24,
|
|
16
|
+
"average_text_length": 65.9814126394052,
|
|
17
|
+
"max_text_length": 178,
|
|
18
|
+
"unique_texts": 269
|
|
19
|
+
},
|
|
20
|
+
"queries_image_statistics": null,
|
|
21
|
+
"relevant_docs_statistics": {
|
|
22
|
+
"num_relevant_docs": 278,
|
|
23
|
+
"min_relevant_docs_per_query": 1,
|
|
24
|
+
"average_relevant_docs_per_query": 1.033457249070632,
|
|
25
|
+
"max_relevant_docs_per_query": 8,
|
|
26
|
+
"unique_relevant_docs": 278
|
|
27
|
+
},
|
|
28
|
+
"top_ranked_statistics": null
|
|
29
|
+
}
|
|
30
|
+
}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
{
|
|
2
|
+
"test": {
|
|
3
|
+
"num_samples": 694,
|
|
4
|
+
"number_of_characters": 1042838,
|
|
5
|
+
"documents_text_statistics": {
|
|
6
|
+
"total_text_length": 1009106,
|
|
7
|
+
"min_text_length": 330,
|
|
8
|
+
"average_text_length": 3881.1769230769232,
|
|
9
|
+
"max_text_length": 8508,
|
|
10
|
+
"unique_texts": 260
|
|
11
|
+
},
|
|
12
|
+
"documents_image_statistics": null,
|
|
13
|
+
"queries_text_statistics": {
|
|
14
|
+
"total_text_length": 33732,
|
|
15
|
+
"min_text_length": 27,
|
|
16
|
+
"average_text_length": 77.72350230414746,
|
|
17
|
+
"max_text_length": 229,
|
|
18
|
+
"unique_texts": 434
|
|
19
|
+
},
|
|
20
|
+
"queries_image_statistics": null,
|
|
21
|
+
"relevant_docs_statistics": {
|
|
22
|
+
"num_relevant_docs": 434,
|
|
23
|
+
"min_relevant_docs_per_query": 1,
|
|
24
|
+
"average_relevant_docs_per_query": 1.0,
|
|
25
|
+
"max_relevant_docs_per_query": 1,
|
|
26
|
+
"unique_relevant_docs": 260
|
|
27
|
+
},
|
|
28
|
+
"top_ranked_statistics": null
|
|
29
|
+
}
|
|
30
|
+
}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
{
|
|
2
|
+
"test": {
|
|
3
|
+
"num_samples": 3972,
|
|
4
|
+
"number_of_characters": 2868994,
|
|
5
|
+
"documents_text_statistics": {
|
|
6
|
+
"total_text_length": 2745174,
|
|
7
|
+
"min_text_length": 38,
|
|
8
|
+
"average_text_length": 1382.2628398791542,
|
|
9
|
+
"max_text_length": 9305,
|
|
10
|
+
"unique_texts": 1986
|
|
11
|
+
},
|
|
12
|
+
"documents_image_statistics": null,
|
|
13
|
+
"queries_text_statistics": {
|
|
14
|
+
"total_text_length": 123820,
|
|
15
|
+
"min_text_length": 15,
|
|
16
|
+
"average_text_length": 62.34642497482377,
|
|
17
|
+
"max_text_length": 147,
|
|
18
|
+
"unique_texts": 1986
|
|
19
|
+
},
|
|
20
|
+
"queries_image_statistics": null,
|
|
21
|
+
"relevant_docs_statistics": {
|
|
22
|
+
"num_relevant_docs": 1986,
|
|
23
|
+
"min_relevant_docs_per_query": 1,
|
|
24
|
+
"average_relevant_docs_per_query": 1.0,
|
|
25
|
+
"max_relevant_docs_per_query": 1,
|
|
26
|
+
"unique_relevant_docs": 1986
|
|
27
|
+
},
|
|
28
|
+
"top_ranked_statistics": null
|
|
29
|
+
}
|
|
30
|
+
}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
{
|
|
2
|
+
"test": {
|
|
3
|
+
"num_samples": 86064,
|
|
4
|
+
"number_of_characters": 27281829,
|
|
5
|
+
"documents_text_statistics": {
|
|
6
|
+
"total_text_length": 24664041,
|
|
7
|
+
"min_text_length": 1,
|
|
8
|
+
"average_text_length": 573.1558142777468,
|
|
9
|
+
"max_text_length": 16984,
|
|
10
|
+
"unique_texts": 42605
|
|
11
|
+
},
|
|
12
|
+
"documents_image_statistics": null,
|
|
13
|
+
"queries_text_statistics": {
|
|
14
|
+
"total_text_length": 2617788,
|
|
15
|
+
"min_text_length": 9,
|
|
16
|
+
"average_text_length": 60.8335192414947,
|
|
17
|
+
"max_text_length": 489,
|
|
18
|
+
"unique_texts": 32605
|
|
19
|
+
},
|
|
20
|
+
"queries_image_statistics": null,
|
|
21
|
+
"relevant_docs_statistics": {
|
|
22
|
+
"num_relevant_docs": 43032,
|
|
23
|
+
"min_relevant_docs_per_query": 1,
|
|
24
|
+
"average_relevant_docs_per_query": 1.0,
|
|
25
|
+
"max_relevant_docs_per_query": 1,
|
|
26
|
+
"unique_relevant_docs": 43032
|
|
27
|
+
},
|
|
28
|
+
"top_ranked_statistics": null
|
|
29
|
+
}
|
|
30
|
+
}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
{
|
|
2
|
+
"test": {
|
|
3
|
+
"num_samples": 2120,
|
|
4
|
+
"number_of_characters": 3979257,
|
|
5
|
+
"documents_text_statistics": {
|
|
6
|
+
"total_text_length": 3863515,
|
|
7
|
+
"min_text_length": 2876,
|
|
8
|
+
"average_text_length": 26282.414965986394,
|
|
9
|
+
"max_text_length": 70846,
|
|
10
|
+
"unique_texts": 147
|
|
11
|
+
},
|
|
12
|
+
"documents_image_statistics": null,
|
|
13
|
+
"queries_text_statistics": {
|
|
14
|
+
"total_text_length": 115742,
|
|
15
|
+
"min_text_length": 11,
|
|
16
|
+
"average_text_length": 58.66294982260517,
|
|
17
|
+
"max_text_length": 194,
|
|
18
|
+
"unique_texts": 1973
|
|
19
|
+
},
|
|
20
|
+
"queries_image_statistics": null,
|
|
21
|
+
"relevant_docs_statistics": {
|
|
22
|
+
"num_relevant_docs": 1983,
|
|
23
|
+
"min_relevant_docs_per_query": 1,
|
|
24
|
+
"average_relevant_docs_per_query": 1.005068423720223,
|
|
25
|
+
"max_relevant_docs_per_query": 3,
|
|
26
|
+
"unique_relevant_docs": 147
|
|
27
|
+
},
|
|
28
|
+
"top_ranked_statistics": null
|
|
29
|
+
}
|
|
30
|
+
}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
{
|
|
2
|
+
"test": {
|
|
3
|
+
"num_samples": 7701,
|
|
4
|
+
"number_of_characters": 78399574,
|
|
5
|
+
"documents_text_statistics": {
|
|
6
|
+
"total_text_length": 78029218,
|
|
7
|
+
"min_text_length": 76,
|
|
8
|
+
"average_text_length": 21765.472245467223,
|
|
9
|
+
"max_text_length": 40884,
|
|
10
|
+
"unique_texts": 3585
|
|
11
|
+
},
|
|
12
|
+
"documents_image_statistics": null,
|
|
13
|
+
"queries_text_statistics": {
|
|
14
|
+
"total_text_length": 370356,
|
|
15
|
+
"min_text_length": 24,
|
|
16
|
+
"average_text_length": 89.9795918367347,
|
|
17
|
+
"max_text_length": 446,
|
|
18
|
+
"unique_texts": 4116
|
|
19
|
+
},
|
|
20
|
+
"queries_image_statistics": null,
|
|
21
|
+
"relevant_docs_statistics": {
|
|
22
|
+
"num_relevant_docs": 4117,
|
|
23
|
+
"min_relevant_docs_per_query": 1,
|
|
24
|
+
"average_relevant_docs_per_query": 1.000242954324587,
|
|
25
|
+
"max_relevant_docs_per_query": 2,
|
|
26
|
+
"unique_relevant_docs": 3585
|
|
27
|
+
},
|
|
28
|
+
"top_ranked_statistics": null
|
|
29
|
+
}
|
|
30
|
+
}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
{
|
|
2
|
+
"test": {
|
|
3
|
+
"num_samples": 4266,
|
|
4
|
+
"number_of_characters": 15390566,
|
|
5
|
+
"documents_text_statistics": {
|
|
6
|
+
"total_text_length": 12950811,
|
|
7
|
+
"min_text_length": 825,
|
|
8
|
+
"average_text_length": 6071.641350210971,
|
|
9
|
+
"max_text_length": 41072,
|
|
10
|
+
"unique_texts": 2133
|
|
11
|
+
},
|
|
12
|
+
"documents_image_statistics": null,
|
|
13
|
+
"queries_text_statistics": {
|
|
14
|
+
"total_text_length": 2439755,
|
|
15
|
+
"min_text_length": 18,
|
|
16
|
+
"average_text_length": 1143.8138771683075,
|
|
17
|
+
"max_text_length": 22983,
|
|
18
|
+
"unique_texts": 2133
|
|
19
|
+
},
|
|
20
|
+
"queries_image_statistics": null,
|
|
21
|
+
"relevant_docs_statistics": {
|
|
22
|
+
"num_relevant_docs": 2133,
|
|
23
|
+
"min_relevant_docs_per_query": 1,
|
|
24
|
+
"average_relevant_docs_per_query": 1.0,
|
|
25
|
+
"max_relevant_docs_per_query": 1,
|
|
26
|
+
"unique_relevant_docs": 2133
|
|
27
|
+
},
|
|
28
|
+
"top_ranked_statistics": null
|
|
29
|
+
}
|
|
30
|
+
}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
{
|
|
2
|
+
"test": {
|
|
3
|
+
"num_samples": 1978,
|
|
4
|
+
"number_of_characters": 613245,
|
|
5
|
+
"documents_text_statistics": {
|
|
6
|
+
"total_text_length": 421921,
|
|
7
|
+
"min_text_length": 222,
|
|
8
|
+
"average_text_length": 845.5330661322645,
|
|
9
|
+
"max_text_length": 4667,
|
|
10
|
+
"unique_texts": 499
|
|
11
|
+
},
|
|
12
|
+
"documents_image_statistics": null,
|
|
13
|
+
"queries_text_statistics": {
|
|
14
|
+
"total_text_length": 191324,
|
|
15
|
+
"min_text_length": 32,
|
|
16
|
+
"average_text_length": 129.3603786342123,
|
|
17
|
+
"max_text_length": 515,
|
|
18
|
+
"unique_texts": 1479
|
|
19
|
+
},
|
|
20
|
+
"queries_image_statistics": null,
|
|
21
|
+
"relevant_docs_statistics": {
|
|
22
|
+
"num_relevant_docs": 1482,
|
|
23
|
+
"min_relevant_docs_per_query": 1,
|
|
24
|
+
"average_relevant_docs_per_query": 1.002028397565923,
|
|
25
|
+
"max_relevant_docs_per_query": 2,
|
|
26
|
+
"unique_relevant_docs": 499
|
|
27
|
+
},
|
|
28
|
+
"top_ranked_statistics": null
|
|
29
|
+
}
|
|
30
|
+
}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
{
|
|
2
|
+
"test": {
|
|
3
|
+
"num_samples": 930,
|
|
4
|
+
"number_of_characters": 574933,
|
|
5
|
+
"documents_text_statistics": {
|
|
6
|
+
"total_text_length": 355258,
|
|
7
|
+
"min_text_length": 357,
|
|
8
|
+
"average_text_length": 763.9956989247312,
|
|
9
|
+
"max_text_length": 2450,
|
|
10
|
+
"unique_texts": 465
|
|
11
|
+
},
|
|
12
|
+
"documents_image_statistics": null,
|
|
13
|
+
"queries_text_statistics": {
|
|
14
|
+
"total_text_length": 219675,
|
|
15
|
+
"min_text_length": 155,
|
|
16
|
+
"average_text_length": 472.4193548387097,
|
|
17
|
+
"max_text_length": 2000,
|
|
18
|
+
"unique_texts": 465
|
|
19
|
+
},
|
|
20
|
+
"queries_image_statistics": null,
|
|
21
|
+
"relevant_docs_statistics": {
|
|
22
|
+
"num_relevant_docs": 465,
|
|
23
|
+
"min_relevant_docs_per_query": 1,
|
|
24
|
+
"average_relevant_docs_per_query": 1.0,
|
|
25
|
+
"max_relevant_docs_per_query": 1,
|
|
26
|
+
"unique_relevant_docs": 465
|
|
27
|
+
},
|
|
28
|
+
"top_ranked_statistics": null
|
|
29
|
+
}
|
|
30
|
+
}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
{
|
|
2
|
+
"test": {
|
|
3
|
+
"num_samples": 8968,
|
|
4
|
+
"number_of_characters": 2159796,
|
|
5
|
+
"documents_text_statistics": {
|
|
6
|
+
"total_text_length": 1160348,
|
|
7
|
+
"min_text_length": 99,
|
|
8
|
+
"average_text_length": 240.68616469612112,
|
|
9
|
+
"max_text_length": 490,
|
|
10
|
+
"unique_texts": 4821
|
|
11
|
+
},
|
|
12
|
+
"documents_image_statistics": null,
|
|
13
|
+
"queries_text_statistics": {
|
|
14
|
+
"total_text_length": 999448,
|
|
15
|
+
"min_text_length": 107,
|
|
16
|
+
"average_text_length": 241.0050639016156,
|
|
17
|
+
"max_text_length": 489,
|
|
18
|
+
"unique_texts": 4147
|
|
19
|
+
},
|
|
20
|
+
"queries_image_statistics": null,
|
|
21
|
+
"relevant_docs_statistics": {
|
|
22
|
+
"num_relevant_docs": 4976,
|
|
23
|
+
"min_relevant_docs_per_query": 1,
|
|
24
|
+
"average_relevant_docs_per_query": 1.199903544731131,
|
|
25
|
+
"max_relevant_docs_per_query": 6,
|
|
26
|
+
"unique_relevant_docs": 4821
|
|
27
|
+
},
|
|
28
|
+
"top_ranked_statistics": null
|
|
29
|
+
}
|
|
30
|
+
}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
{
|
|
2
|
+
"test": {
|
|
3
|
+
"num_samples": 4485,
|
|
4
|
+
"number_of_characters": 176136,
|
|
5
|
+
"documents_text_statistics": {
|
|
6
|
+
"total_text_length": 92062,
|
|
7
|
+
"min_text_length": 10,
|
|
8
|
+
"average_text_length": 40.64547461368653,
|
|
9
|
+
"max_text_length": 232,
|
|
10
|
+
"unique_texts": 2265
|
|
11
|
+
},
|
|
12
|
+
"documents_image_statistics": null,
|
|
13
|
+
"queries_text_statistics": {
|
|
14
|
+
"total_text_length": 84074,
|
|
15
|
+
"min_text_length": 10,
|
|
16
|
+
"average_text_length": 37.87117117117117,
|
|
17
|
+
"max_text_length": 188,
|
|
18
|
+
"unique_texts": 2220
|
|
19
|
+
},
|
|
20
|
+
"queries_image_statistics": null,
|
|
21
|
+
"relevant_docs_statistics": {
|
|
22
|
+
"num_relevant_docs": 2302,
|
|
23
|
+
"min_relevant_docs_per_query": 1,
|
|
24
|
+
"average_relevant_docs_per_query": 1.036936936936937,
|
|
25
|
+
"max_relevant_docs_per_query": 4,
|
|
26
|
+
"unique_relevant_docs": 2265
|
|
27
|
+
},
|
|
28
|
+
"top_ranked_statistics": null
|
|
29
|
+
}
|
|
30
|
+
}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
{
|
|
2
|
+
"test": {
|
|
3
|
+
"num_samples": 14541,
|
|
4
|
+
"number_of_characters": 94819741,
|
|
5
|
+
"documents_text_statistics": {
|
|
6
|
+
"total_text_length": 94589279,
|
|
7
|
+
"min_text_length": 20,
|
|
8
|
+
"average_text_length": 12980.551530122135,
|
|
9
|
+
"max_text_length": 2772073,
|
|
10
|
+
"unique_texts": 7287
|
|
11
|
+
},
|
|
12
|
+
"documents_image_statistics": null,
|
|
13
|
+
"queries_text_statistics": {
|
|
14
|
+
"total_text_length": 230462,
|
|
15
|
+
"min_text_length": 10,
|
|
16
|
+
"average_text_length": 31.770333609043288,
|
|
17
|
+
"max_text_length": 154,
|
|
18
|
+
"unique_texts": 7254
|
|
19
|
+
},
|
|
20
|
+
"queries_image_statistics": null,
|
|
21
|
+
"relevant_docs_statistics": {
|
|
22
|
+
"num_relevant_docs": 7287,
|
|
23
|
+
"min_relevant_docs_per_query": 1,
|
|
24
|
+
"average_relevant_docs_per_query": 1.0045492142266337,
|
|
25
|
+
"max_relevant_docs_per_query": 4,
|
|
26
|
+
"unique_relevant_docs": 7287
|
|
27
|
+
},
|
|
28
|
+
"top_ranked_statistics": null
|
|
29
|
+
}
|
|
30
|
+
}
|