mteb 2.0.4__py3-none-any.whl → 2.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. mteb/descriptive_stats/BitextMining/BUCC.json +70 -40
  2. mteb/descriptive_stats/Classification/DKHateClassification.json +40 -24
  3. mteb/descriptive_stats/Classification/FinancialPhrasebankClassification.json +23 -15
  4. mteb/descriptive_stats/Classification/ImdbClassification.json +40 -24
  5. mteb/descriptive_stats/Classification/KorHateClassification.json +23 -15
  6. mteb/descriptive_stats/Clustering/ArxivClusteringP2P.json +555 -550
  7. mteb/descriptive_stats/Clustering/ArxivClusteringP2P.v2.json +546 -541
  8. mteb/descriptive_stats/Clustering/ArxivClusteringS2S.json +555 -550
  9. mteb/descriptive_stats/Clustering/MLSUMClusteringP2P.json +2466 -2416
  10. mteb/descriptive_stats/Clustering/RedditClusteringP2P.json +1365 -1360
  11. mteb/descriptive_stats/Clustering/SNLClustering.json +378 -373
  12. mteb/descriptive_stats/Clustering/SwednClustering.json +28 -23
  13. mteb/descriptive_stats/Clustering/VGClustering.json +54 -49
  14. mteb/descriptive_stats/Image/Any2AnyMultilingualRetrieval/WITT2IRetrieval.json +324 -204
  15. mteb/descriptive_stats/Image/Any2AnyRetrieval/MemotionI2TRetrieval.json +28 -18
  16. mteb/descriptive_stats/Image/DocumentUnderstanding/JinaVDRAirbnbSyntheticRetrieval.json +334 -0
  17. mteb/descriptive_stats/Image/DocumentUnderstanding/JinaVDRGitHubReadmeRetrieval.json +544 -0
  18. mteb/descriptive_stats/Image/DocumentUnderstanding/JinaVDRTweetStockSyntheticsRetrieval.json +334 -0
  19. mteb/descriptive_stats/Image/DocumentUnderstanding/JinaVDRWikimediaCommonsDocumentsRetrieval.json +634 -0
  20. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore2ESGReportsRetrieval.json +154 -0
  21. mteb/descriptive_stats/Image/DocumentUnderstanding/Vidore2EconomicsReportsRetrieval.json +154 -0
  22. mteb/descriptive_stats/Image/ImageClassification/Imagenet1k.json +6039 -3007
  23. mteb/descriptive_stats/Image/ZeroShotClassification/Imagenet1kZeroShot.json +3024 -3010
  24. mteb/descriptive_stats/Image/ZeroShotClassification/PatchCamelyonZeroShot.json +30 -16
  25. mteb/descriptive_stats/Reranking/MIRACLReranking.json +555 -479
  26. mteb/descriptive_stats/Reranking/MindSmallReranking.json +29 -25
  27. mteb/descriptive_stats/Retrieval/AlloprofRetrieval.json +25 -26
  28. mteb/descriptive_stats/Retrieval/Code1Retrieval.json +30 -0
  29. mteb/descriptive_stats/Retrieval/DanFEVER.json +25 -26
  30. mteb/descriptive_stats/Retrieval/EnglishFinance1Retrieval.json +30 -0
  31. mteb/descriptive_stats/Retrieval/EnglishFinance2Retrieval.json +30 -0
  32. mteb/descriptive_stats/Retrieval/EnglishFinance3Retrieval.json +30 -0
  33. mteb/descriptive_stats/Retrieval/EnglishFinance4Retrieval.json +30 -0
  34. mteb/descriptive_stats/Retrieval/EnglishHealthcare1Retrieval.json +30 -0
  35. mteb/descriptive_stats/Retrieval/French1Retrieval.json +30 -0
  36. mteb/descriptive_stats/Retrieval/FrenchLegal1Retrieval.json +30 -0
  37. mteb/descriptive_stats/Retrieval/German1Retrieval.json +30 -0
  38. mteb/descriptive_stats/Retrieval/GermanHealthcare1Retrieval.json +30 -0
  39. mteb/descriptive_stats/Retrieval/GermanLegal1Retrieval.json +30 -0
  40. mteb/descriptive_stats/Retrieval/JapaneseCode1Retrieval.json +30 -0
  41. mteb/descriptive_stats/Retrieval/JapaneseLegal1Retrieval.json +30 -0
  42. mteb/descriptive_stats/Retrieval/MIRACLRetrieval.json +475 -494
  43. mteb/descriptive_stats/Retrieval/MSMARCO-Fa.json +25 -26
  44. mteb/descriptive_stats/Retrieval/MSMARCO.json +25 -84
  45. mteb/descriptive_stats/Retrieval/Touche2020.json +25 -26
  46. mteb/descriptive_stats/Summarization/SummEval.json +27 -50
  47. mteb/descriptive_stats/Summarization/SummEvalFr.json +27 -50
  48. mteb/models/model_implementations/kalm_models.py +29 -0
  49. mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +1 -1
  50. mteb/tasks/classification/eng/financial_phrasebank_classification.py +0 -3
  51. mteb/tasks/classification/kor/kor_hate_classification.py +0 -12
  52. mteb/tasks/clustering/swe/swedn_clustering.py +2 -2
  53. mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py +1 -1
  54. {mteb-2.0.4.dist-info → mteb-2.0.5.dist-info}/METADATA +1 -1
  55. {mteb-2.0.4.dist-info → mteb-2.0.5.dist-info}/RECORD +59 -40
  56. {mteb-2.0.4.dist-info → mteb-2.0.5.dist-info}/WHEEL +0 -0
  57. {mteb-2.0.4.dist-info → mteb-2.0.5.dist-info}/entry_points.txt +0 -0
  58. {mteb-2.0.4.dist-info → mteb-2.0.5.dist-info}/licenses/LICENSE +0 -0
  59. {mteb-2.0.4.dist-info → mteb-2.0.5.dist-info}/top_level.txt +0 -0
@@ -3,66 +3,96 @@
3
3
  "num_samples": 35000,
4
4
  "number_of_characters": 146737556,
5
5
  "unique_pairs": 35000,
6
- "min_sentence1_length": 16,
7
- "average_sentence1_length": 99.10931428571429,
8
- "max_sentence1_length": 204,
9
- "unique_sentence1": 34978,
10
- "min_sentence2_length": 17,
11
- "average_sentence2_length": 101.14933691422246,
12
- "max_sentence2_length": 339,
13
- "unique_sentence2": 1133728,
6
+ "sentence1_statistics": {
7
+ "total_text_length": 3468826,
8
+ "min_text_length": 16,
9
+ "average_text_length": 99.10931428571429,
10
+ "max_text_length": 204,
11
+ "unique_texts": 34978
12
+ },
13
+ "sentence2_statistics": {
14
+ "total_text_length": 143268730,
15
+ "min_text_length": 17,
16
+ "average_text_length": 101.14933691422246,
17
+ "max_text_length": 339,
18
+ "unique_texts": 1133728
19
+ },
14
20
  "hf_subset_descriptive_stats": {
15
21
  "de-en": {
16
22
  "num_samples": 9580,
17
23
  "number_of_characters": 41450074,
18
24
  "unique_pairs": 9580,
19
- "min_sentence1_length": 50,
20
- "average_sentence1_length": 109.07974947807934,
21
- "max_sentence1_length": 204,
22
- "unique_sentence1": 9573,
23
- "min_sentence2_length": 17,
24
- "average_sentence2_length": 101.18043156531952,
25
- "max_sentence2_length": 293,
26
- "unique_sentence2": 397151
25
+ "sentence1_statistics": {
26
+ "total_text_length": 1044984,
27
+ "min_text_length": 50,
28
+ "average_text_length": 109.07974947807934,
29
+ "max_text_length": 204,
30
+ "unique_texts": 9573
31
+ },
32
+ "sentence2_statistics": {
33
+ "total_text_length": 40405090,
34
+ "min_text_length": 17,
35
+ "average_text_length": 101.18043156531952,
36
+ "max_text_length": 293,
37
+ "unique_texts": 397151
38
+ }
27
39
  },
28
40
  "fr-en": {
29
41
  "num_samples": 9086,
30
42
  "number_of_characters": 38272453,
31
43
  "unique_pairs": 9086,
32
- "min_sentence1_length": 43,
33
- "average_sentence1_length": 99.31785163988553,
34
- "max_sentence1_length": 174,
35
- "unique_sentence1": 9081,
36
- "min_sentence2_length": 21,
37
- "average_sentence2_length": 101.05202942051324,
38
- "max_sentence2_length": 319,
39
- "unique_sentence2": 368033
44
+ "sentence1_statistics": {
45
+ "total_text_length": 902402,
46
+ "min_text_length": 43,
47
+ "average_text_length": 99.31785163988553,
48
+ "max_text_length": 174,
49
+ "unique_texts": 9081
50
+ },
51
+ "sentence2_statistics": {
52
+ "total_text_length": 37370051,
53
+ "min_text_length": 21,
54
+ "average_text_length": 101.05202942051324,
55
+ "max_text_length": 319,
56
+ "unique_texts": 368033
57
+ }
40
58
  },
41
59
  "ru-en": {
42
60
  "num_samples": 14435,
43
61
  "number_of_characters": 57904085,
44
62
  "unique_pairs": 14435,
45
- "min_sentence1_length": 40,
46
- "average_sentence1_length": 101.6593003117423,
47
- "max_sentence1_length": 186,
48
- "unique_sentence1": 14425,
49
- "min_sentence2_length": 21,
50
- "average_sentence2_length": 101.06828784332406,
51
- "max_sentence2_length": 339,
52
- "unique_sentence2": 555503
63
+ "sentence1_statistics": {
64
+ "total_text_length": 1467452,
65
+ "min_text_length": 40,
66
+ "average_text_length": 101.6593003117423,
67
+ "max_text_length": 186,
68
+ "unique_texts": 14425
69
+ },
70
+ "sentence2_statistics": {
71
+ "total_text_length": 56436633,
72
+ "min_text_length": 21,
73
+ "average_text_length": 101.06828784332406,
74
+ "max_text_length": 339,
75
+ "unique_texts": 555503
76
+ }
53
77
  },
54
78
  "zh-en": {
55
79
  "num_samples": 1899,
56
80
  "number_of_characters": 9110944,
57
81
  "unique_pairs": 1899,
58
- "min_sentence1_length": 16,
59
- "average_sentence1_length": 28.429699842022117,
60
- "max_sentence1_length": 40,
61
- "unique_sentence1": 1899,
62
- "min_sentence2_length": 22,
63
- "average_sentence2_length": 101.92388026108485,
64
- "max_sentence2_length": 249,
65
- "unique_sentence2": 88360
82
+ "sentence1_statistics": {
83
+ "total_text_length": 53988,
84
+ "min_text_length": 16,
85
+ "average_text_length": 28.429699842022117,
86
+ "max_text_length": 40,
87
+ "unique_texts": 1899
88
+ },
89
+ "sentence2_statistics": {
90
+ "total_text_length": 9056956,
91
+ "min_text_length": 22,
92
+ "average_text_length": 101.92388026108485,
93
+ "max_text_length": 249,
94
+ "unique_texts": 88360
95
+ }
66
96
  }
67
97
  }
68
98
  }
@@ -1,37 +1,53 @@
1
1
  {
2
2
  "test": {
3
3
  "num_samples": 329,
4
- "number_of_characters": 29011,
5
4
  "number_texts_intersect_with_train": 4,
6
- "min_text_length": 1,
7
- "average_text_length": 88.17933130699087,
8
- "max_text_length": 2434,
9
- "unique_text": 326,
10
- "unique_labels": 2,
11
- "labels": {
12
- "0": {
13
- "count": 288
14
- },
15
- "1": {
16
- "count": 41
5
+ "text_statistics": {
6
+ "total_text_length": 29011,
7
+ "min_text_length": 1,
8
+ "average_text_length": 88.17933130699087,
9
+ "max_text_length": 2434,
10
+ "unique_texts": 326
11
+ },
12
+ "image_statistics": null,
13
+ "label_statistics": {
14
+ "min_labels_per_text": 1,
15
+ "average_label_per_text": 1.0,
16
+ "max_labels_per_text": 1,
17
+ "unique_labels": 2,
18
+ "labels": {
19
+ "0": {
20
+ "count": 288
21
+ },
22
+ "1": {
23
+ "count": 41
24
+ }
17
25
  }
18
26
  }
19
27
  },
20
28
  "train": {
21
29
  "num_samples": 2960,
22
- "number_of_characters": 307722,
23
30
  "number_texts_intersect_with_train": null,
24
- "min_text_length": 1,
25
- "average_text_length": 103.96013513513513,
26
- "max_text_length": 5403,
27
- "unique_text": 2902,
28
- "unique_labels": 2,
29
- "labels": {
30
- "0": {
31
- "count": 2576
32
- },
33
- "1": {
34
- "count": 384
31
+ "text_statistics": {
32
+ "total_text_length": 307722,
33
+ "min_text_length": 1,
34
+ "average_text_length": 103.96013513513513,
35
+ "max_text_length": 5403,
36
+ "unique_texts": 2902
37
+ },
38
+ "image_statistics": null,
39
+ "label_statistics": {
40
+ "min_labels_per_text": 1,
41
+ "average_label_per_text": 1.0,
42
+ "max_labels_per_text": 1,
43
+ "unique_labels": 2,
44
+ "labels": {
45
+ "0": {
46
+ "count": 2576
47
+ },
48
+ "1": {
49
+ "count": 384
50
+ }
35
51
  }
36
52
  }
37
53
  }
@@ -1,22 +1,30 @@
1
1
  {
2
2
  "train": {
3
3
  "num_samples": 2264,
4
- "number_of_characters": 276123,
5
4
  "number_texts_intersect_with_train": null,
6
- "min_text_length": 9,
7
- "average_text_length": 121.96245583038869,
8
- "max_text_length": 315,
9
- "unique_text": 2259,
10
- "unique_labels": 3,
11
- "labels": {
12
- "1": {
13
- "count": 1391
14
- },
15
- "2": {
16
- "count": 570
17
- },
18
- "0": {
19
- "count": 303
5
+ "text_statistics": {
6
+ "total_text_length": 276123,
7
+ "min_text_length": 9,
8
+ "average_text_length": 121.96245583038869,
9
+ "max_text_length": 315,
10
+ "unique_texts": 2259
11
+ },
12
+ "image_statistics": null,
13
+ "label_statistics": {
14
+ "min_labels_per_text": 1,
15
+ "average_label_per_text": 1.0,
16
+ "max_labels_per_text": 1,
17
+ "unique_labels": 3,
18
+ "labels": {
19
+ "1": {
20
+ "count": 1391
21
+ },
22
+ "2": {
23
+ "count": 570
24
+ },
25
+ "0": {
26
+ "count": 303
27
+ }
20
28
  }
21
29
  }
22
30
  }
@@ -1,37 +1,53 @@
1
1
  {
2
2
  "test": {
3
3
  "num_samples": 25000,
4
- "number_of_characters": 32344810,
5
4
  "number_texts_intersect_with_train": 123,
6
- "min_text_length": 32,
7
- "average_text_length": 1293.7924,
8
- "max_text_length": 12988,
9
- "unique_text": 24801,
10
- "unique_labels": 2,
11
- "labels": {
12
- "0": {
13
- "count": 12500
14
- },
15
- "1": {
16
- "count": 12500
5
+ "text_statistics": {
6
+ "total_text_length": 32344810,
7
+ "min_text_length": 32,
8
+ "average_text_length": 1293.7924,
9
+ "max_text_length": 12988,
10
+ "unique_texts": 24801
11
+ },
12
+ "image_statistics": null,
13
+ "label_statistics": {
14
+ "min_labels_per_text": 1,
15
+ "average_label_per_text": 1.0,
16
+ "max_labels_per_text": 1,
17
+ "unique_labels": 2,
18
+ "labels": {
19
+ "0": {
20
+ "count": 12500
21
+ },
22
+ "1": {
23
+ "count": 12500
24
+ }
17
25
  }
18
26
  }
19
27
  },
20
28
  "train": {
21
29
  "num_samples": 25000,
22
- "number_of_characters": 33126741,
23
30
  "number_texts_intersect_with_train": null,
24
- "min_text_length": 52,
25
- "average_text_length": 1325.06964,
26
- "max_text_length": 13704,
27
- "unique_text": 24904,
28
- "unique_labels": 2,
29
- "labels": {
30
- "0": {
31
- "count": 12500
32
- },
33
- "1": {
34
- "count": 12500
31
+ "text_statistics": {
32
+ "total_text_length": 33126741,
33
+ "min_text_length": 52,
34
+ "average_text_length": 1325.06964,
35
+ "max_text_length": 13704,
36
+ "unique_texts": 24904
37
+ },
38
+ "image_statistics": null,
39
+ "label_statistics": {
40
+ "min_labels_per_text": 1,
41
+ "average_label_per_text": 1.0,
42
+ "max_labels_per_text": 1,
43
+ "unique_labels": 2,
44
+ "labels": {
45
+ "0": {
46
+ "count": 12500
47
+ },
48
+ "1": {
49
+ "count": 12500
50
+ }
35
51
  }
36
52
  }
37
53
  }
@@ -1,22 +1,30 @@
1
1
  {
2
2
  "train": {
3
3
  "num_samples": 2048,
4
- "number_of_characters": 79006,
5
4
  "number_texts_intersect_with_train": null,
6
- "min_text_length": 4,
7
- "average_text_length": 38.5771484375,
8
- "max_text_length": 130,
9
- "unique_text": 2048,
10
- "unique_labels": 3,
11
- "labels": {
12
- "1": {
13
- "count": 648
14
- },
15
- "2": {
16
- "count": 904
17
- },
18
- "0": {
19
- "count": 496
5
+ "text_statistics": {
6
+ "total_text_length": 79006,
7
+ "min_text_length": 4,
8
+ "average_text_length": 38.5771484375,
9
+ "max_text_length": 130,
10
+ "unique_texts": 2048
11
+ },
12
+ "image_statistics": null,
13
+ "label_statistics": {
14
+ "min_labels_per_text": 1,
15
+ "average_label_per_text": 1.0,
16
+ "max_labels_per_text": 1,
17
+ "unique_labels": 3,
18
+ "labels": {
19
+ "1": {
20
+ "count": 648
21
+ },
22
+ "2": {
23
+ "count": 904
24
+ },
25
+ "0": {
26
+ "count": 496
27
+ }
20
28
  }
21
29
  }
22
30
  }