mteb 2.0.4__py3-none-any.whl → 2.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. mteb/__init__.py +10 -1
  2. mteb/benchmarks/benchmarks/__init__.py +2 -0
  3. mteb/benchmarks/benchmarks/benchmarks.py +75 -0
  4. mteb/descriptive_stats/BitextMining/BUCC.json +70 -40
  5. mteb/descriptive_stats/Classification/DKHateClassification.json +40 -24
  6. mteb/descriptive_stats/Classification/DutchColaClassification.json +54 -0
  7. mteb/descriptive_stats/Classification/DutchGovernmentBiasClassification.json +54 -0
  8. mteb/descriptive_stats/Classification/DutchNewsArticlesClassification.json +90 -0
  9. mteb/descriptive_stats/Classification/DutchSarcasticHeadlinesClassification.json +54 -0
  10. mteb/descriptive_stats/Classification/FinancialPhrasebankClassification.json +23 -15
  11. mteb/descriptive_stats/Classification/IconclassClassification.json +96 -0
  12. mteb/descriptive_stats/Classification/ImdbClassification.json +40 -24
  13. mteb/descriptive_stats/Classification/KorHateClassification.json +23 -15
  14. mteb/descriptive_stats/Classification/OpenTenderClassification.json +222 -0
  15. mteb/descriptive_stats/Classification/VaccinChatNLClassification.json +1068 -0
  16. mteb/descriptive_stats/Clustering/ArxivClusteringP2P.json +555 -550
  17. mteb/descriptive_stats/Clustering/ArxivClusteringP2P.v2.json +546 -541
  18. mteb/descriptive_stats/Clustering/ArxivClusteringS2S.json +555 -550
  19. mteb/descriptive_stats/Clustering/DutchNewsArticlesClusteringP2P.json +45 -0
  20. mteb/descriptive_stats/Clustering/DutchNewsArticlesClusteringS2S.json +45 -0
  21. mteb/descriptive_stats/Clustering/IconclassClusteringS2S.json +48 -0
  22. mteb/descriptive_stats/Clustering/MLSUMClusteringP2P.json +2466 -2416
  23. mteb/descriptive_stats/Clustering/OpenTenderClusteringP2P.json +111 -0
  24. mteb/descriptive_stats/Clustering/OpenTenderClusteringS2S.json +111 -0
  25. mteb/descriptive_stats/Clustering/RedditClusteringP2P.json +1365 -1360
  26. mteb/descriptive_stats/Clustering/SNLClustering.json +378 -373
  27. mteb/descriptive_stats/Clustering/SwednClustering.json +28 -23
  28. mteb/descriptive_stats/Clustering/VABBClusteringP2P.json +60 -0
  29. mteb/descriptive_stats/Clustering/VABBClusteringS2S.json +60 -0
  30. mteb/descriptive_stats/Clustering/VGClustering.json +54 -49
  31. mteb/descriptive_stats/Image/Any2AnyMultilingualRetrieval/WITT2IRetrieval.json +324 -204
  32. mteb/descriptive_stats/Image/Any2AnyRetrieval/MemotionI2TRetrieval.json +28 -18
  33. mteb/descriptive_stats/Image/DocumentUnderstanding/JinaVDRAirbnbSyntheticRetrieval.json +334 -0
  34. mteb/descriptive_stats/Image/DocumentUnderstanding/JinaVDRGitHubReadmeRetrieval.json +544 -0
  35. mteb/descriptive_stats/Image/DocumentUnderstanding/JinaVDRTweetStockSyntheticsRetrieval.json +334 -0
  36. mteb/descriptive_stats/Image/DocumentUnderstanding/JinaVDRWikimediaCommonsDocumentsRetrieval.json +634 -0
  37. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore2ESGReportsRetrieval.json +154 -0
  38. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore2EconomicsReportsRetrieval.json +154 -0
  39. mteb/descriptive_stats/Image/ImageClassification/Imagenet1k.json +6039 -3007
  40. mteb/descriptive_stats/Image/ZeroShotClassification/Imagenet1kZeroShot.json +3024 -3010
  41. mteb/descriptive_stats/Image/ZeroShotClassification/PatchCamelyonZeroShot.json +30 -16
  42. mteb/descriptive_stats/MultilabelClassification/CovidDisinformationNLMultiLabelClassification.json +84 -0
  43. mteb/descriptive_stats/MultilabelClassification/VABBMultiLabelClassification.json +156 -0
  44. mteb/descriptive_stats/PairClassification/SICKNLPairClassification.json +35 -0
  45. mteb/descriptive_stats/PairClassification/XLWICNLPairClassification.json +35 -0
  46. mteb/descriptive_stats/Reranking/MIRACLReranking.json +555 -479
  47. mteb/descriptive_stats/Reranking/MindSmallReranking.json +29 -25
  48. mteb/descriptive_stats/Retrieval/AlloprofRetrieval.json +25 -26
  49. mteb/descriptive_stats/Retrieval/Code1Retrieval.json +30 -0
  50. mteb/descriptive_stats/Retrieval/DanFEVER.json +25 -26
  51. mteb/descriptive_stats/Retrieval/DutchNewsArticlesRetrieval.json +30 -0
  52. mteb/descriptive_stats/Retrieval/EnglishFinance1Retrieval.json +30 -0
  53. mteb/descriptive_stats/Retrieval/EnglishFinance2Retrieval.json +30 -0
  54. mteb/descriptive_stats/Retrieval/EnglishFinance3Retrieval.json +30 -0
  55. mteb/descriptive_stats/Retrieval/EnglishFinance4Retrieval.json +30 -0
  56. mteb/descriptive_stats/Retrieval/EnglishHealthcare1Retrieval.json +30 -0
  57. mteb/descriptive_stats/Retrieval/French1Retrieval.json +30 -0
  58. mteb/descriptive_stats/Retrieval/FrenchLegal1Retrieval.json +30 -0
  59. mteb/descriptive_stats/Retrieval/German1Retrieval.json +30 -0
  60. mteb/descriptive_stats/Retrieval/GermanHealthcare1Retrieval.json +30 -0
  61. mteb/descriptive_stats/Retrieval/GermanLegal1Retrieval.json +30 -0
  62. mteb/descriptive_stats/Retrieval/JapaneseCode1Retrieval.json +30 -0
  63. mteb/descriptive_stats/Retrieval/JapaneseLegal1Retrieval.json +30 -0
  64. mteb/descriptive_stats/Retrieval/LegalQANLRetrieval.json +30 -0
  65. mteb/descriptive_stats/Retrieval/MIRACLRetrieval.json +475 -494
  66. mteb/descriptive_stats/Retrieval/MSMARCO-Fa.json +25 -26
  67. mteb/descriptive_stats/Retrieval/MSMARCO.json +25 -84
  68. mteb/descriptive_stats/Retrieval/OpenTenderRetrieval.json +30 -0
  69. mteb/descriptive_stats/Retrieval/Touche2020.json +25 -26
  70. mteb/descriptive_stats/Retrieval/VABBRetrieval.json +30 -0
  71. mteb/descriptive_stats/Retrieval/bBSARDNLRetrieval.json +30 -0
  72. mteb/descriptive_stats/STS/SICK-NL-STS.json +28 -0
  73. mteb/descriptive_stats/Summarization/SummEval.json +27 -50
  74. mteb/descriptive_stats/Summarization/SummEvalFr.json +27 -50
  75. mteb/models/model_implementations/kalm_models.py +29 -0
  76. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +1 -1
  77. mteb/tasks/classification/eng/financial_phrasebank_classification.py +0 -3
  78. mteb/tasks/classification/kor/kor_hate_classification.py +0 -12
  79. mteb/tasks/classification/nld/__init__.py +16 -0
  80. mteb/tasks/classification/nld/dutch_cola_classification.py +38 -0
  81. mteb/tasks/classification/nld/dutch_government_bias_classification.py +37 -0
  82. mteb/tasks/classification/nld/dutch_news_articles_classification.py +30 -0
  83. mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +36 -0
  84. mteb/tasks/classification/nld/iconclass_classification.py +41 -0
  85. mteb/tasks/classification/nld/open_tender_classification.py +38 -0
  86. mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +46 -0
  87. mteb/tasks/clustering/__init__.py +1 -0
  88. mteb/tasks/clustering/nld/__init__.py +17 -0
  89. mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +37 -0
  90. mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +37 -0
  91. mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +47 -0
  92. mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +51 -0
  93. mteb/tasks/clustering/nld/open_tender_clustering_s2s.py +41 -0
  94. mteb/tasks/clustering/nld/vabb_clustering_p2p.py +51 -0
  95. mteb/tasks/clustering/nld/vabb_clustering_s2s.py +51 -0
  96. mteb/tasks/clustering/swe/swedn_clustering.py +2 -2
  97. mteb/tasks/multilabel_classification/__init__.py +1 -0
  98. mteb/tasks/multilabel_classification/nld/__init__.py +9 -0
  99. mteb/tasks/multilabel_classification/nld/covid_disinformation_nl_multi_label_classification.py +88 -0
  100. mteb/tasks/multilabel_classification/nld/vabb_multi_label_classification.py +44 -0
  101. mteb/tasks/pair_classification/__init__.py +1 -0
  102. mteb/tasks/pair_classification/nld/__init__.py +7 -0
  103. mteb/tasks/pair_classification/nld/sick_nl_pair_classification.py +36 -0
  104. mteb/tasks/pair_classification/nld/xlwic_nl_pair_classification.py +41 -0
  105. mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py +1 -1
  106. mteb/tasks/retrieval/nld/__init__.py +10 -0
  107. mteb/tasks/retrieval/nld/bbsard_nl_retrieval.py +41 -0
  108. mteb/tasks/retrieval/nld/dutch_news_articles_retrieval.py +30 -0
  109. mteb/tasks/retrieval/nld/legal_qa_nl_retrieval.py +39 -0
  110. mteb/tasks/retrieval/nld/open_tender_retrieval.py +38 -0
  111. mteb/tasks/retrieval/nld/vabb_retrieval.py +41 -0
  112. mteb/tasks/sts/__init__.py +1 -0
  113. mteb/tasks/sts/nld/__init__.py +5 -0
  114. mteb/tasks/sts/nld/sick_nl_sts.py +41 -0
  115. {mteb-2.0.4.dist-info → mteb-2.1.0.dist-info}/METADATA +2 -204
  116. {mteb-2.0.4.dist-info → mteb-2.1.0.dist-info}/RECORD +120 -49
  117. {mteb-2.0.4.dist-info → mteb-2.1.0.dist-info}/WHEEL +0 -0
  118. {mteb-2.0.4.dist-info → mteb-2.1.0.dist-info}/entry_points.txt +0 -0
  119. {mteb-2.0.4.dist-info → mteb-2.1.0.dist-info}/licenses/LICENSE +0 -0
  120. {mteb-2.0.4.dist-info → mteb-2.1.0.dist-info}/top_level.txt +0 -0
@@ -2,30 +2,34 @@
2
2
  "test": {
3
3
  "num_samples": 2367791,
4
4
  "number_of_characters": 162620316,
5
- "num_documents": 5277,
6
- "min_document_length": 11,
7
- "average_document_length": 65.06348303960584,
8
- "max_document_length": 176,
9
- "unique_documents": 5277,
10
- "num_queries": 2362514,
11
- "min_query_length": 11,
12
- "average_query_length": 68.68826004840606,
13
- "max_query_length": 251,
14
- "unique_queries": 2362514,
15
- "none_queries": 0,
16
- "num_relevant_docs": 97006943,
17
- "min_relevant_docs_per_query": 2,
18
- "average_relevant_docs_per_query": 1.8289660928993436,
19
- "max_relevant_docs_per_query": 295,
20
- "unique_relevant_docs": 5277,
21
- "num_instructions": null,
22
- "min_instruction_length": null,
23
- "average_instruction_length": null,
24
- "max_instruction_length": null,
25
- "unique_instructions": null,
26
- "num_top_ranked": 2362514,
27
- "min_top_ranked_per_query": 2,
28
- "average_top_ranked_per_query": 41.06168556038187,
29
- "max_top_ranked_per_query": 295
5
+ "documents_text_statistics": {
6
+ "total_text_length": 343340,
7
+ "min_text_length": 11,
8
+ "average_text_length": 65.06348303960584,
9
+ "max_text_length": 176,
10
+ "unique_texts": 5277
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 162276976,
15
+ "min_text_length": 11,
16
+ "average_text_length": 68.68826004840606,
17
+ "max_text_length": 251,
18
+ "unique_texts": 37162
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 4320958,
23
+ "min_relevant_docs_per_query": 2,
24
+ "average_relevant_docs_per_query": 1.8289660928993436,
25
+ "max_relevant_docs_per_query": 295,
26
+ "unique_relevant_docs": 5277
27
+ },
28
+ "top_ranked_statistics": {
29
+ "num_top_ranked": 97008807,
30
+ "min_top_ranked_per_query": 2,
31
+ "average_top_ranked_per_query": 41.06168556038187,
32
+ "max_top_ranked_per_query": 295
33
+ }
30
34
  }
31
35
  }
@@ -1,31 +1,30 @@
1
1
  {
2
2
  "test": {
3
3
  "num_samples": 4872,
4
- "number_of_characters": 9355954,
5
- "num_documents": 2556,
6
- "min_document_length": 9,
7
- "average_document_length": 3505.705399061033,
8
- "max_document_length": 47930,
9
- "unique_documents": 2556,
10
- "num_queries": 2316,
11
- "min_query_length": 8,
12
- "average_query_length": 170.71286701208982,
13
- "max_query_length": 2863,
14
- "unique_queries": 2316,
15
- "none_queries": 0,
16
- "num_relevant_docs": 2316,
17
- "min_relevant_docs_per_query": 1,
18
- "average_relevant_docs_per_query": 1.0,
19
- "max_relevant_docs_per_query": 1,
20
- "unique_relevant_docs": 988,
21
- "num_instructions": null,
22
- "min_instruction_length": null,
23
- "average_instruction_length": null,
24
- "max_instruction_length": null,
25
- "unique_instructions": null,
26
- "num_top_ranked": null,
27
- "min_top_ranked_per_query": null,
28
- "average_top_ranked_per_query": null,
29
- "max_top_ranked_per_query": null
4
+ "number_of_characters": 9352943,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 8957572,
7
+ "min_text_length": 8,
8
+ "average_text_length": 3504.527386541471,
9
+ "max_text_length": 47929,
10
+ "unique_texts": 2556
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 395371,
15
+ "min_text_length": 8,
16
+ "average_text_length": 170.71286701208982,
17
+ "max_text_length": 2863,
18
+ "unique_texts": 2316
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 2316,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 1.0,
25
+ "max_relevant_docs_per_query": 1,
26
+ "unique_relevant_docs": 988
27
+ },
28
+ "top_ranked_statistics": null
30
29
  }
31
30
  }
@@ -0,0 +1,30 @@
1
+ {
2
+ "test": {
3
+ "num_samples": 29266,
4
+ "number_of_characters": 13467784,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 11954895,
7
+ "min_text_length": 9,
8
+ "average_text_length": 816.9818219093829,
9
+ "max_text_length": 10005,
10
+ "unique_texts": 14527
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 1512889,
15
+ "min_text_length": 42,
16
+ "average_text_length": 103.38884712635823,
17
+ "max_text_length": 281,
18
+ "unique_texts": 14600
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 14633,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 1.0,
25
+ "max_relevant_docs_per_query": 1,
26
+ "unique_relevant_docs": 14633
27
+ },
28
+ "top_ranked_statistics": null
29
+ }
30
+ }
@@ -1,31 +1,30 @@
1
1
  {
2
2
  "train": {
3
3
  "num_samples": 8897,
4
- "number_of_characters": 1108138,
5
- "num_documents": 2524,
6
- "min_document_length": 28,
7
- "average_document_length": 312.1117274167987,
8
- "max_document_length": 1748,
9
- "unique_documents": 2524,
10
- "num_queries": 6373,
11
- "min_query_length": 11,
12
- "average_query_length": 50.26957476855484,
13
- "max_query_length": 188,
14
- "unique_queries": 6373,
15
- "none_queries": 0,
16
- "num_relevant_docs": 6382,
17
- "min_relevant_docs_per_query": 1,
18
- "average_relevant_docs_per_query": 0.48721167425074535,
19
- "max_relevant_docs_per_query": 3,
20
- "unique_relevant_docs": 2524,
21
- "num_instructions": null,
22
- "min_instruction_length": null,
23
- "average_instruction_length": null,
24
- "max_instruction_length": null,
25
- "unique_instructions": null,
26
- "num_top_ranked": null,
27
- "min_top_ranked_per_query": null,
28
- "average_top_ranked_per_query": null,
29
- "max_top_ranked_per_query": null
4
+ "number_of_characters": 1108127,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 787759,
7
+ "min_text_length": 28,
8
+ "average_text_length": 312.10736925515056,
9
+ "max_text_length": 1748,
10
+ "unique_texts": 2522
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 320368,
15
+ "min_text_length": 11,
16
+ "average_text_length": 50.26957476855484,
17
+ "max_text_length": 188,
18
+ "unique_texts": 6373
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 3105,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 0.48721167425074535,
25
+ "max_relevant_docs_per_query": 3,
26
+ "unique_relevant_docs": 2524
27
+ },
28
+ "top_ranked_statistics": null
30
29
  }
31
30
  }
@@ -0,0 +1,30 @@
1
+ {
2
+ "test": {
3
+ "num_samples": 256524,
4
+ "number_of_characters": 415016602,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 414968472,
7
+ "min_text_length": 0,
8
+ "average_text_length": 1623.9902005291087,
9
+ "max_text_length": 29368,
10
+ "unique_texts": 255216
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 48130,
15
+ "min_text_length": 11,
16
+ "average_text_length": 48.13,
17
+ "max_text_length": 110,
18
+ "unique_texts": 1000
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 1000,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 1.0,
25
+ "max_relevant_docs_per_query": 1,
26
+ "unique_relevant_docs": 1000
27
+ },
28
+ "top_ranked_statistics": null
29
+ }
30
+ }
@@ -0,0 +1,30 @@
1
+ {
2
+ "test": {
3
+ "num_samples": 547,
4
+ "number_of_characters": 512504,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 494755,
7
+ "min_text_length": 251,
8
+ "average_text_length": 1779.6942446043165,
9
+ "max_text_length": 6830,
10
+ "unique_texts": 278
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 17749,
15
+ "min_text_length": 24,
16
+ "average_text_length": 65.9814126394052,
17
+ "max_text_length": 178,
18
+ "unique_texts": 269
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 278,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 1.033457249070632,
25
+ "max_relevant_docs_per_query": 8,
26
+ "unique_relevant_docs": 278
27
+ },
28
+ "top_ranked_statistics": null
29
+ }
30
+ }
@@ -0,0 +1,30 @@
1
+ {
2
+ "test": {
3
+ "num_samples": 694,
4
+ "number_of_characters": 1042838,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 1009106,
7
+ "min_text_length": 330,
8
+ "average_text_length": 3881.1769230769232,
9
+ "max_text_length": 8508,
10
+ "unique_texts": 260
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 33732,
15
+ "min_text_length": 27,
16
+ "average_text_length": 77.72350230414746,
17
+ "max_text_length": 229,
18
+ "unique_texts": 434
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 434,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 1.0,
25
+ "max_relevant_docs_per_query": 1,
26
+ "unique_relevant_docs": 260
27
+ },
28
+ "top_ranked_statistics": null
29
+ }
30
+ }
@@ -0,0 +1,30 @@
1
+ {
2
+ "test": {
3
+ "num_samples": 3972,
4
+ "number_of_characters": 2868994,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 2745174,
7
+ "min_text_length": 38,
8
+ "average_text_length": 1382.2628398791542,
9
+ "max_text_length": 9305,
10
+ "unique_texts": 1986
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 123820,
15
+ "min_text_length": 15,
16
+ "average_text_length": 62.34642497482377,
17
+ "max_text_length": 147,
18
+ "unique_texts": 1986
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 1986,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 1.0,
25
+ "max_relevant_docs_per_query": 1,
26
+ "unique_relevant_docs": 1986
27
+ },
28
+ "top_ranked_statistics": null
29
+ }
30
+ }
@@ -0,0 +1,30 @@
1
+ {
2
+ "test": {
3
+ "num_samples": 86064,
4
+ "number_of_characters": 27281829,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 24664041,
7
+ "min_text_length": 1,
8
+ "average_text_length": 573.1558142777468,
9
+ "max_text_length": 16984,
10
+ "unique_texts": 42605
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 2617788,
15
+ "min_text_length": 9,
16
+ "average_text_length": 60.8335192414947,
17
+ "max_text_length": 489,
18
+ "unique_texts": 32605
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 43032,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 1.0,
25
+ "max_relevant_docs_per_query": 1,
26
+ "unique_relevant_docs": 43032
27
+ },
28
+ "top_ranked_statistics": null
29
+ }
30
+ }
@@ -0,0 +1,30 @@
1
+ {
2
+ "test": {
3
+ "num_samples": 2120,
4
+ "number_of_characters": 3979257,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 3863515,
7
+ "min_text_length": 2876,
8
+ "average_text_length": 26282.414965986394,
9
+ "max_text_length": 70846,
10
+ "unique_texts": 147
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 115742,
15
+ "min_text_length": 11,
16
+ "average_text_length": 58.66294982260517,
17
+ "max_text_length": 194,
18
+ "unique_texts": 1973
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 1983,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 1.005068423720223,
25
+ "max_relevant_docs_per_query": 3,
26
+ "unique_relevant_docs": 147
27
+ },
28
+ "top_ranked_statistics": null
29
+ }
30
+ }
@@ -0,0 +1,30 @@
1
+ {
2
+ "test": {
3
+ "num_samples": 7701,
4
+ "number_of_characters": 78399574,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 78029218,
7
+ "min_text_length": 76,
8
+ "average_text_length": 21765.472245467223,
9
+ "max_text_length": 40884,
10
+ "unique_texts": 3585
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 370356,
15
+ "min_text_length": 24,
16
+ "average_text_length": 89.9795918367347,
17
+ "max_text_length": 446,
18
+ "unique_texts": 4116
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 4117,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 1.000242954324587,
25
+ "max_relevant_docs_per_query": 2,
26
+ "unique_relevant_docs": 3585
27
+ },
28
+ "top_ranked_statistics": null
29
+ }
30
+ }
@@ -0,0 +1,30 @@
1
+ {
2
+ "test": {
3
+ "num_samples": 4266,
4
+ "number_of_characters": 15390566,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 12950811,
7
+ "min_text_length": 825,
8
+ "average_text_length": 6071.641350210971,
9
+ "max_text_length": 41072,
10
+ "unique_texts": 2133
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 2439755,
15
+ "min_text_length": 18,
16
+ "average_text_length": 1143.8138771683075,
17
+ "max_text_length": 22983,
18
+ "unique_texts": 2133
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 2133,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 1.0,
25
+ "max_relevant_docs_per_query": 1,
26
+ "unique_relevant_docs": 2133
27
+ },
28
+ "top_ranked_statistics": null
29
+ }
30
+ }
@@ -0,0 +1,30 @@
1
+ {
2
+ "test": {
3
+ "num_samples": 1978,
4
+ "number_of_characters": 613245,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 421921,
7
+ "min_text_length": 222,
8
+ "average_text_length": 845.5330661322645,
9
+ "max_text_length": 4667,
10
+ "unique_texts": 499
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 191324,
15
+ "min_text_length": 32,
16
+ "average_text_length": 129.3603786342123,
17
+ "max_text_length": 515,
18
+ "unique_texts": 1479
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 1482,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 1.002028397565923,
25
+ "max_relevant_docs_per_query": 2,
26
+ "unique_relevant_docs": 499
27
+ },
28
+ "top_ranked_statistics": null
29
+ }
30
+ }
@@ -0,0 +1,30 @@
1
+ {
2
+ "test": {
3
+ "num_samples": 930,
4
+ "number_of_characters": 574933,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 355258,
7
+ "min_text_length": 357,
8
+ "average_text_length": 763.9956989247312,
9
+ "max_text_length": 2450,
10
+ "unique_texts": 465
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 219675,
15
+ "min_text_length": 155,
16
+ "average_text_length": 472.4193548387097,
17
+ "max_text_length": 2000,
18
+ "unique_texts": 465
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 465,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 1.0,
25
+ "max_relevant_docs_per_query": 1,
26
+ "unique_relevant_docs": 465
27
+ },
28
+ "top_ranked_statistics": null
29
+ }
30
+ }
@@ -0,0 +1,30 @@
1
+ {
2
+ "test": {
3
+ "num_samples": 8968,
4
+ "number_of_characters": 2159796,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 1160348,
7
+ "min_text_length": 99,
8
+ "average_text_length": 240.68616469612112,
9
+ "max_text_length": 490,
10
+ "unique_texts": 4821
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 999448,
15
+ "min_text_length": 107,
16
+ "average_text_length": 241.0050639016156,
17
+ "max_text_length": 489,
18
+ "unique_texts": 4147
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 4976,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 1.199903544731131,
25
+ "max_relevant_docs_per_query": 6,
26
+ "unique_relevant_docs": 4821
27
+ },
28
+ "top_ranked_statistics": null
29
+ }
30
+ }
@@ -0,0 +1,30 @@
1
+ {
2
+ "test": {
3
+ "num_samples": 4485,
4
+ "number_of_characters": 176136,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 92062,
7
+ "min_text_length": 10,
8
+ "average_text_length": 40.64547461368653,
9
+ "max_text_length": 232,
10
+ "unique_texts": 2265
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 84074,
15
+ "min_text_length": 10,
16
+ "average_text_length": 37.87117117117117,
17
+ "max_text_length": 188,
18
+ "unique_texts": 2220
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 2302,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 1.036936936936937,
25
+ "max_relevant_docs_per_query": 4,
26
+ "unique_relevant_docs": 2265
27
+ },
28
+ "top_ranked_statistics": null
29
+ }
30
+ }
@@ -0,0 +1,30 @@
1
+ {
2
+ "test": {
3
+ "num_samples": 14541,
4
+ "number_of_characters": 94819741,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 94589279,
7
+ "min_text_length": 20,
8
+ "average_text_length": 12980.551530122135,
9
+ "max_text_length": 2772073,
10
+ "unique_texts": 7287
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 230462,
15
+ "min_text_length": 10,
16
+ "average_text_length": 31.770333609043288,
17
+ "max_text_length": 154,
18
+ "unique_texts": 7254
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 7287,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 1.0045492142266337,
25
+ "max_relevant_docs_per_query": 4,
26
+ "unique_relevant_docs": 7287
27
+ },
28
+ "top_ranked_statistics": null
29
+ }
30
+ }
@@ -0,0 +1,30 @@
1
+ {
2
+ "test": {
3
+ "num_samples": 30905,
4
+ "number_of_characters": 20629665,
5
+ "documents_text_statistics": {
6
+ "total_text_length": 20619027,
7
+ "min_text_length": 41,
8
+ "average_text_length": 669.3837288575788,
9
+ "max_text_length": 1716,
10
+ "unique_texts": 30172
11
+ },
12
+ "documents_image_statistics": null,
13
+ "queries_text_statistics": {
14
+ "total_text_length": 10638,
15
+ "min_text_length": 27,
16
+ "average_text_length": 104.29411764705883,
17
+ "max_text_length": 369,
18
+ "unique_texts": 102
19
+ },
20
+ "queries_image_statistics": null,
21
+ "relevant_docs_statistics": {
22
+ "num_relevant_docs": 157,
23
+ "min_relevant_docs_per_query": 1,
24
+ "average_relevant_docs_per_query": 1.5392156862745099,
25
+ "max_relevant_docs_per_query": 8,
26
+ "unique_relevant_docs": 148
27
+ },
28
+ "top_ranked_statistics": null
29
+ }
30
+ }