mteb 2.0.4__py3-none-any.whl → 2.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. mteb/descriptive_stats/BitextMining/BUCC.json +70 -40
  2. mteb/descriptive_stats/Classification/DKHateClassification.json +40 -24
  3. mteb/descriptive_stats/Classification/FinancialPhrasebankClassification.json +23 -15
  4. mteb/descriptive_stats/Classification/ImdbClassification.json +40 -24
  5. mteb/descriptive_stats/Classification/KorHateClassification.json +23 -15
  6. mteb/descriptive_stats/Clustering/ArxivClusteringP2P.json +555 -550
  7. mteb/descriptive_stats/Clustering/ArxivClusteringP2P.v2.json +546 -541
  8. mteb/descriptive_stats/Clustering/ArxivClusteringS2S.json +555 -550
  9. mteb/descriptive_stats/Clustering/MLSUMClusteringP2P.json +2466 -2416
  10. mteb/descriptive_stats/Clustering/RedditClusteringP2P.json +1365 -1360
  11. mteb/descriptive_stats/Clustering/SNLClustering.json +378 -373
  12. mteb/descriptive_stats/Clustering/SwednClustering.json +28 -23
  13. mteb/descriptive_stats/Clustering/VGClustering.json +54 -49
  14. mteb/descriptive_stats/Image/Any2AnyMultilingualRetrieval/WITT2IRetrieval.json +324 -204
  15. mteb/descriptive_stats/Image/Any2AnyRetrieval/MemotionI2TRetrieval.json +28 -18
  16. mteb/descriptive_stats/Image/DocumentUnderstanding/JinaVDRAirbnbSyntheticRetrieval.json +334 -0
  17. mteb/descriptive_stats/Image/DocumentUnderstanding/JinaVDRGitHubReadmeRetrieval.json +544 -0
  18. mteb/descriptive_stats/Image/DocumentUnderstanding/JinaVDRTweetStockSyntheticsRetrieval.json +334 -0
  19. mteb/descriptive_stats/Image/DocumentUnderstanding/JinaVDRWikimediaCommonsDocumentsRetrieval.json +634 -0
  20. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore2ESGReportsRetrieval.json +154 -0
  21. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore2EconomicsReportsRetrieval.json +154 -0
  22. mteb/descriptive_stats/Image/ImageClassification/Imagenet1k.json +6039 -3007
  23. mteb/descriptive_stats/Image/ZeroShotClassification/Imagenet1kZeroShot.json +3024 -3010
  24. mteb/descriptive_stats/Image/ZeroShotClassification/PatchCamelyonZeroShot.json +30 -16
  25. mteb/descriptive_stats/Reranking/MIRACLReranking.json +555 -479
  26. mteb/descriptive_stats/Reranking/MindSmallReranking.json +29 -25
  27. mteb/descriptive_stats/Retrieval/AlloprofRetrieval.json +25 -26
  28. mteb/descriptive_stats/Retrieval/Code1Retrieval.json +30 -0
  29. mteb/descriptive_stats/Retrieval/DanFEVER.json +25 -26
  30. mteb/descriptive_stats/Retrieval/EnglishFinance1Retrieval.json +30 -0
  31. mteb/descriptive_stats/Retrieval/EnglishFinance2Retrieval.json +30 -0
  32. mteb/descriptive_stats/Retrieval/EnglishFinance3Retrieval.json +30 -0
  33. mteb/descriptive_stats/Retrieval/EnglishFinance4Retrieval.json +30 -0
  34. mteb/descriptive_stats/Retrieval/EnglishHealthcare1Retrieval.json +30 -0
  35. mteb/descriptive_stats/Retrieval/French1Retrieval.json +30 -0
  36. mteb/descriptive_stats/Retrieval/FrenchLegal1Retrieval.json +30 -0
  37. mteb/descriptive_stats/Retrieval/German1Retrieval.json +30 -0
  38. mteb/descriptive_stats/Retrieval/GermanHealthcare1Retrieval.json +30 -0
  39. mteb/descriptive_stats/Retrieval/GermanLegal1Retrieval.json +30 -0
  40. mteb/descriptive_stats/Retrieval/JapaneseCode1Retrieval.json +30 -0
  41. mteb/descriptive_stats/Retrieval/JapaneseLegal1Retrieval.json +30 -0
  42. mteb/descriptive_stats/Retrieval/MIRACLRetrieval.json +475 -494
  43. mteb/descriptive_stats/Retrieval/MSMARCO-Fa.json +25 -26
  44. mteb/descriptive_stats/Retrieval/MSMARCO.json +25 -84
  45. mteb/descriptive_stats/Retrieval/Touche2020.json +25 -26
  46. mteb/descriptive_stats/Summarization/SummEval.json +27 -50
  47. mteb/descriptive_stats/Summarization/SummEvalFr.json +27 -50
  48. mteb/models/model_implementations/kalm_models.py +29 -0
  49. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +1 -1
  50. mteb/tasks/classification/eng/financial_phrasebank_classification.py +0 -3
  51. mteb/tasks/classification/kor/kor_hate_classification.py +0 -12
  52. mteb/tasks/clustering/swe/swedn_clustering.py +2 -2
  53. mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py +1 -1
  54. {mteb-2.0.4.dist-info → mteb-2.0.5.dist-info}/METADATA +1 -1
  55. {mteb-2.0.4.dist-info → mteb-2.0.5.dist-info}/RECORD +59 -40
  56. {mteb-2.0.4.dist-info → mteb-2.0.5.dist-info}/WHEEL +0 -0
  57. {mteb-2.0.4.dist-info → mteb-2.0.5.dist-info}/entry_points.txt +0 -0
  58. {mteb-2.0.4.dist-info → mteb-2.0.5.dist-info}/licenses/LICENSE +0 -0
  59. {mteb-2.0.4.dist-info → mteb-2.0.5.dist-info}/top_level.txt +0 -0
@@ -2,30 +2,34 @@
2
2
  "test": {
3
3
  "num_samples": 2367791,
4
4
  "number_of_characters": 162620316,
5
- "num_documents": 5277,
6
- "min_document_length": 11,
7
- "average_document_length": 65.06348303960584,
8
- "max_document_length": 176,
9
- "unique_documents": 5277,
10
- "num_queries": 2362514,
11
- "min_query_length": 11,
12
- "average_query_length": 68.68826004840606,
13
- "max_query_length": 251,
14
- "unique_queries": 2362514,
15
- "none_queries": 0,
16
- "num_relevant_docs": 97006943,
17
- "min_relevant_docs_per_query": 2,
18
- "average_relevant_docs_per_query": 1.8289660928993436,
19
- "max_relevant_docs_per_query": 295,
20
- "unique_relevant_docs": 5277,
21
- "num_instructions": null,
22
- "min_instruction_length": null,
23
- "average_instruction_length": null,
24
- "max_instruction_length": null,
25
- "unique_instructions": null,
26
- "num_top_ranked": 2362514,
27
- "min_top_ranked_per_query": 2,
28
- "average_top_ranked_per_query": 41.06168556038187,
29
- "max_top_ranked_per_query": 295
5
+ "documents_text_statistics": {
6
+ "total_text_length": 343340,
7
+ "min_text_length": 11,
8
+ "average_text_length": 65.06348303960584,
9
+ "max_text_length": 176,
10
+ "unique_texts": 5277
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 162276976,
15
+ "min_text_length": 11,
16
+ "average_text_length": 68.68826004840606,
17
+ "max_text_length": 251,
18
+ "unique_texts": 37162
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 4320958,
23
+ "min_relevant_docs_per_query": 2,
24
+ "average_relevant_docs_per_query": 1.8289660928993436,
25
+ "max_relevant_docs_per_query": 295,
26
+ "unique_relevant_docs": 5277
27
+ },
28
+ "top_ranked_statistics": {
29
+ "num_top_ranked": 97008807,
30
+ "min_top_ranked_per_query": 2,
31
+ "average_top_ranked_per_query": 41.06168556038187,
32
+ "max_top_ranked_per_query": 295
33
+ }
30
34
  }
31
35
  }
@@ -1,31 +1,30 @@
1
1
  {
2
2
  "test": {
3
3
  "num_samples": 4872,
4
- "number_of_characters": 9355954,
5
- "num_documents": 2556,
6
- "min_document_length": 9,
7
- "average_document_length": 3505.705399061033,
8
- "max_document_length": 47930,
9
- "unique_documents": 2556,
10
- "num_queries": 2316,
11
- "min_query_length": 8,
12
- "average_query_length": 170.71286701208982,
13
- "max_query_length": 2863,
14
- "unique_queries": 2316,
15
- "none_queries": 0,
16
- "num_relevant_docs": 2316,
17
- "min_relevant_docs_per_query": 1,
18
- "average_relevant_docs_per_query": 1.0,
19
- "max_relevant_docs_per_query": 1,
20
- "unique_relevant_docs": 988,
21
- "num_instructions": null,
22
- "min_instruction_length": null,
23
- "average_instruction_length": null,
24
- "max_instruction_length": null,
25
- "unique_instructions": null,
26
- "num_top_ranked": null,
27
- "min_top_ranked_per_query": null,
28
- "average_top_ranked_per_query": null,
29
- "max_top_ranked_per_query": null
4
+ "number_of_characters": 9352943,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 8957572,
7
+ "min_text_length": 8,
8
+ "average_text_length": 3504.527386541471,
9
+ "max_text_length": 47929,
10
+ "unique_texts": 2556
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 395371,
15
+ "min_text_length": 8,
16
+ "average_text_length": 170.71286701208982,
17
+ "max_text_length": 2863,
18
+ "unique_texts": 2316
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 2316,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 1.0,
25
+ "max_relevant_docs_per_query": 1,
26
+ "unique_relevant_docs": 988
27
+ },
28
+ "top_ranked_statistics": null
30
29
  }
31
30
  }
@@ -0,0 +1,30 @@
1
+ {
2
+ "test": {
3
+ "num_samples": 29266,
4
+ "number_of_characters": 13467784,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 11954895,
7
+ "min_text_length": 9,
8
+ "average_text_length": 816.9818219093829,
9
+ "max_text_length": 10005,
10
+ "unique_texts": 14527
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 1512889,
15
+ "min_text_length": 42,
16
+ "average_text_length": 103.38884712635823,
17
+ "max_text_length": 281,
18
+ "unique_texts": 14600
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 14633,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 1.0,
25
+ "max_relevant_docs_per_query": 1,
26
+ "unique_relevant_docs": 14633
27
+ },
28
+ "top_ranked_statistics": null
29
+ }
30
+ }
@@ -1,31 +1,30 @@
1
1
  {
2
2
  "train": {
3
3
  "num_samples": 8897,
4
- "number_of_characters": 1108138,
5
- "num_documents": 2524,
6
- "min_document_length": 28,
7
- "average_document_length": 312.1117274167987,
8
- "max_document_length": 1748,
9
- "unique_documents": 2524,
10
- "num_queries": 6373,
11
- "min_query_length": 11,
12
- "average_query_length": 50.26957476855484,
13
- "max_query_length": 188,
14
- "unique_queries": 6373,
15
- "none_queries": 0,
16
- "num_relevant_docs": 6382,
17
- "min_relevant_docs_per_query": 1,
18
- "average_relevant_docs_per_query": 0.48721167425074535,
19
- "max_relevant_docs_per_query": 3,
20
- "unique_relevant_docs": 2524,
21
- "num_instructions": null,
22
- "min_instruction_length": null,
23
- "average_instruction_length": null,
24
- "max_instruction_length": null,
25
- "unique_instructions": null,
26
- "num_top_ranked": null,
27
- "min_top_ranked_per_query": null,
28
- "average_top_ranked_per_query": null,
29
- "max_top_ranked_per_query": null
4
+ "number_of_characters": 1108127,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 787759,
7
+ "min_text_length": 28,
8
+ "average_text_length": 312.10736925515056,
9
+ "max_text_length": 1748,
10
+ "unique_texts": 2522
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 320368,
15
+ "min_text_length": 11,
16
+ "average_text_length": 50.26957476855484,
17
+ "max_text_length": 188,
18
+ "unique_texts": 6373
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 3105,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 0.48721167425074535,
25
+ "max_relevant_docs_per_query": 3,
26
+ "unique_relevant_docs": 2524
27
+ },
28
+ "top_ranked_statistics": null
30
29
  }
31
30
  }
@@ -0,0 +1,30 @@
1
+ {
2
+ "test": {
3
+ "num_samples": 547,
4
+ "number_of_characters": 512504,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 494755,
7
+ "min_text_length": 251,
8
+ "average_text_length": 1779.6942446043165,
9
+ "max_text_length": 6830,
10
+ "unique_texts": 278
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 17749,
15
+ "min_text_length": 24,
16
+ "average_text_length": 65.9814126394052,
17
+ "max_text_length": 178,
18
+ "unique_texts": 269
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 278,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 1.033457249070632,
25
+ "max_relevant_docs_per_query": 8,
26
+ "unique_relevant_docs": 278
27
+ },
28
+ "top_ranked_statistics": null
29
+ }
30
+ }
@@ -0,0 +1,30 @@
1
+ {
2
+ "test": {
3
+ "num_samples": 694,
4
+ "number_of_characters": 1042838,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 1009106,
7
+ "min_text_length": 330,
8
+ "average_text_length": 3881.1769230769232,
9
+ "max_text_length": 8508,
10
+ "unique_texts": 260
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 33732,
15
+ "min_text_length": 27,
16
+ "average_text_length": 77.72350230414746,
17
+ "max_text_length": 229,
18
+ "unique_texts": 434
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 434,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 1.0,
25
+ "max_relevant_docs_per_query": 1,
26
+ "unique_relevant_docs": 260
27
+ },
28
+ "top_ranked_statistics": null
29
+ }
30
+ }
@@ -0,0 +1,30 @@
1
+ {
2
+ "test": {
3
+ "num_samples": 3972,
4
+ "number_of_characters": 2868994,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 2745174,
7
+ "min_text_length": 38,
8
+ "average_text_length": 1382.2628398791542,
9
+ "max_text_length": 9305,
10
+ "unique_texts": 1986
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 123820,
15
+ "min_text_length": 15,
16
+ "average_text_length": 62.34642497482377,
17
+ "max_text_length": 147,
18
+ "unique_texts": 1986
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 1986,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 1.0,
25
+ "max_relevant_docs_per_query": 1,
26
+ "unique_relevant_docs": 1986
27
+ },
28
+ "top_ranked_statistics": null
29
+ }
30
+ }
@@ -0,0 +1,30 @@
1
+ {
2
+ "test": {
3
+ "num_samples": 86064,
4
+ "number_of_characters": 27281829,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 24664041,
7
+ "min_text_length": 1,
8
+ "average_text_length": 573.1558142777468,
9
+ "max_text_length": 16984,
10
+ "unique_texts": 42605
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 2617788,
15
+ "min_text_length": 9,
16
+ "average_text_length": 60.8335192414947,
17
+ "max_text_length": 489,
18
+ "unique_texts": 32605
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 43032,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 1.0,
25
+ "max_relevant_docs_per_query": 1,
26
+ "unique_relevant_docs": 43032
27
+ },
28
+ "top_ranked_statistics": null
29
+ }
30
+ }
@@ -0,0 +1,30 @@
1
+ {
2
+ "test": {
3
+ "num_samples": 2120,
4
+ "number_of_characters": 3979257,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 3863515,
7
+ "min_text_length": 2876,
8
+ "average_text_length": 26282.414965986394,
9
+ "max_text_length": 70846,
10
+ "unique_texts": 147
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 115742,
15
+ "min_text_length": 11,
16
+ "average_text_length": 58.66294982260517,
17
+ "max_text_length": 194,
18
+ "unique_texts": 1973
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 1983,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 1.005068423720223,
25
+ "max_relevant_docs_per_query": 3,
26
+ "unique_relevant_docs": 147
27
+ },
28
+ "top_ranked_statistics": null
29
+ }
30
+ }
@@ -0,0 +1,30 @@
1
+ {
2
+ "test": {
3
+ "num_samples": 7701,
4
+ "number_of_characters": 78399574,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 78029218,
7
+ "min_text_length": 76,
8
+ "average_text_length": 21765.472245467223,
9
+ "max_text_length": 40884,
10
+ "unique_texts": 3585
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 370356,
15
+ "min_text_length": 24,
16
+ "average_text_length": 89.9795918367347,
17
+ "max_text_length": 446,
18
+ "unique_texts": 4116
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 4117,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 1.000242954324587,
25
+ "max_relevant_docs_per_query": 2,
26
+ "unique_relevant_docs": 3585
27
+ },
28
+ "top_ranked_statistics": null
29
+ }
30
+ }
@@ -0,0 +1,30 @@
1
+ {
2
+ "test": {
3
+ "num_samples": 4266,
4
+ "number_of_characters": 15390566,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 12950811,
7
+ "min_text_length": 825,
8
+ "average_text_length": 6071.641350210971,
9
+ "max_text_length": 41072,
10
+ "unique_texts": 2133
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 2439755,
15
+ "min_text_length": 18,
16
+ "average_text_length": 1143.8138771683075,
17
+ "max_text_length": 22983,
18
+ "unique_texts": 2133
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 2133,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 1.0,
25
+ "max_relevant_docs_per_query": 1,
26
+ "unique_relevant_docs": 2133
27
+ },
28
+ "top_ranked_statistics": null
29
+ }
30
+ }
@@ -0,0 +1,30 @@
1
+ {
2
+ "test": {
3
+ "num_samples": 1978,
4
+ "number_of_characters": 613245,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 421921,
7
+ "min_text_length": 222,
8
+ "average_text_length": 845.5330661322645,
9
+ "max_text_length": 4667,
10
+ "unique_texts": 499
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 191324,
15
+ "min_text_length": 32,
16
+ "average_text_length": 129.3603786342123,
17
+ "max_text_length": 515,
18
+ "unique_texts": 1479
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 1482,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 1.002028397565923,
25
+ "max_relevant_docs_per_query": 2,
26
+ "unique_relevant_docs": 499
27
+ },
28
+ "top_ranked_statistics": null
29
+ }
30
+ }
@@ -0,0 +1,30 @@
1
+ {
2
+ "test": {
3
+ "num_samples": 930,
4
+ "number_of_characters": 574933,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 355258,
7
+ "min_text_length": 357,
8
+ "average_text_length": 763.9956989247312,
9
+ "max_text_length": 2450,
10
+ "unique_texts": 465
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 219675,
15
+ "min_text_length": 155,
16
+ "average_text_length": 472.4193548387097,
17
+ "max_text_length": 2000,
18
+ "unique_texts": 465
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 465,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 1.0,
25
+ "max_relevant_docs_per_query": 1,
26
+ "unique_relevant_docs": 465
27
+ },
28
+ "top_ranked_statistics": null
29
+ }
30
+ }
@@ -0,0 +1,30 @@
1
+ {
2
+ "test": {
3
+ "num_samples": 8968,
4
+ "number_of_characters": 2159796,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 1160348,
7
+ "min_text_length": 99,
8
+ "average_text_length": 240.68616469612112,
9
+ "max_text_length": 490,
10
+ "unique_texts": 4821
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 999448,
15
+ "min_text_length": 107,
16
+ "average_text_length": 241.0050639016156,
17
+ "max_text_length": 489,
18
+ "unique_texts": 4147
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 4976,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 1.199903544731131,
25
+ "max_relevant_docs_per_query": 6,
26
+ "unique_relevant_docs": 4821
27
+ },
28
+ "top_ranked_statistics": null
29
+ }
30
+ }
@@ -0,0 +1,30 @@
1
+ {
2
+ "test": {
3
+ "num_samples": 4485,
4
+ "number_of_characters": 176136,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 92062,
7
+ "min_text_length": 10,
8
+ "average_text_length": 40.64547461368653,
9
+ "max_text_length": 232,
10
+ "unique_texts": 2265
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 84074,
15
+ "min_text_length": 10,
16
+ "average_text_length": 37.87117117117117,
17
+ "max_text_length": 188,
18
+ "unique_texts": 2220
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 2302,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 1.036936936936937,
25
+ "max_relevant_docs_per_query": 4,
26
+ "unique_relevant_docs": 2265
27
+ },
28
+ "top_ranked_statistics": null
29
+ }
30
+ }
@@ -0,0 +1,30 @@
1
+ {
2
+ "test": {
3
+ "num_samples": 14541,
4
+ "number_of_characters": 94819741,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 94589279,
7
+ "min_text_length": 20,
8
+ "average_text_length": 12980.551530122135,
9
+ "max_text_length": 2772073,
10
+ "unique_texts": 7287
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 230462,
15
+ "min_text_length": 10,
16
+ "average_text_length": 31.770333609043288,
17
+ "max_text_length": 154,
18
+ "unique_texts": 7254
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 7287,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 1.0045492142266337,
25
+ "max_relevant_docs_per_query": 4,
26
+ "unique_relevant_docs": 7287
27
+ },
28
+ "top_ranked_statistics": null
29
+ }
30
+ }