mteb 2.0.5__py3-none-any.whl → 2.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/__init__.py +10 -1
- mteb/benchmarks/benchmarks/__init__.py +2 -0
- mteb/benchmarks/benchmarks/benchmarks.py +75 -0
- mteb/descriptive_stats/Classification/DutchColaClassification.json +54 -0
- mteb/descriptive_stats/Classification/DutchGovernmentBiasClassification.json +54 -0
- mteb/descriptive_stats/Classification/DutchNewsArticlesClassification.json +90 -0
- mteb/descriptive_stats/Classification/DutchSarcasticHeadlinesClassification.json +54 -0
- mteb/descriptive_stats/Classification/IconclassClassification.json +96 -0
- mteb/descriptive_stats/Classification/OpenTenderClassification.json +222 -0
- mteb/descriptive_stats/Classification/VaccinChatNLClassification.json +1068 -0
- mteb/descriptive_stats/Clustering/DutchNewsArticlesClusteringP2P.json +45 -0
- mteb/descriptive_stats/Clustering/DutchNewsArticlesClusteringS2S.json +45 -0
- mteb/descriptive_stats/Clustering/IconclassClusteringS2S.json +48 -0
- mteb/descriptive_stats/Clustering/OpenTenderClusteringP2P.json +111 -0
- mteb/descriptive_stats/Clustering/OpenTenderClusteringS2S.json +111 -0
- mteb/descriptive_stats/Clustering/VABBClusteringP2P.json +60 -0
- mteb/descriptive_stats/Clustering/VABBClusteringS2S.json +60 -0
- mteb/descriptive_stats/MultilabelClassification/CovidDisinformationNLMultiLabelClassification.json +84 -0
- mteb/descriptive_stats/MultilabelClassification/VABBMultiLabelClassification.json +156 -0
- mteb/descriptive_stats/PairClassification/SICKNLPairClassification.json +35 -0
- mteb/descriptive_stats/PairClassification/XLWICNLPairClassification.json +35 -0
- mteb/descriptive_stats/Retrieval/DutchNewsArticlesRetrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/LegalQANLRetrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/OpenTenderRetrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/VABBRetrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/bBSARDNLRetrieval.json +30 -0
- mteb/descriptive_stats/STS/SICK-NL-STS.json +28 -0
- mteb/tasks/classification/nld/__init__.py +16 -0
- mteb/tasks/classification/nld/dutch_cola_classification.py +38 -0
- mteb/tasks/classification/nld/dutch_government_bias_classification.py +37 -0
- mteb/tasks/classification/nld/dutch_news_articles_classification.py +30 -0
- mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +36 -0
- mteb/tasks/classification/nld/iconclass_classification.py +41 -0
- mteb/tasks/classification/nld/open_tender_classification.py +38 -0
- mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +46 -0
- mteb/tasks/clustering/__init__.py +1 -0
- mteb/tasks/clustering/nld/__init__.py +17 -0
- mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +37 -0
- mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +37 -0
- mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +47 -0
- mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +51 -0
- mteb/tasks/clustering/nld/open_tender_clustering_s2s.py +41 -0
- mteb/tasks/clustering/nld/vabb_clustering_p2p.py +51 -0
- mteb/tasks/clustering/nld/vabb_clustering_s2s.py +51 -0
- mteb/tasks/multilabel_classification/__init__.py +1 -0
- mteb/tasks/multilabel_classification/nld/__init__.py +9 -0
- mteb/tasks/multilabel_classification/nld/covid_disinformation_nl_multi_label_classification.py +88 -0
- mteb/tasks/multilabel_classification/nld/vabb_multi_label_classification.py +44 -0
- mteb/tasks/pair_classification/__init__.py +1 -0
- mteb/tasks/pair_classification/nld/__init__.py +7 -0
- mteb/tasks/pair_classification/nld/sick_nl_pair_classification.py +36 -0
- mteb/tasks/pair_classification/nld/xlwic_nl_pair_classification.py +41 -0
- mteb/tasks/retrieval/nld/__init__.py +10 -0
- mteb/tasks/retrieval/nld/bbsard_nl_retrieval.py +41 -0
- mteb/tasks/retrieval/nld/dutch_news_articles_retrieval.py +30 -0
- mteb/tasks/retrieval/nld/legal_qa_nl_retrieval.py +39 -0
- mteb/tasks/retrieval/nld/open_tender_retrieval.py +38 -0
- mteb/tasks/retrieval/nld/vabb_retrieval.py +41 -0
- mteb/tasks/sts/__init__.py +1 -0
- mteb/tasks/sts/nld/__init__.py +5 -0
- mteb/tasks/sts/nld/sick_nl_sts.py +41 -0
- {mteb-2.0.5.dist-info → mteb-2.1.0.dist-info}/METADATA +2 -204
- {mteb-2.0.5.dist-info → mteb-2.1.0.dist-info}/RECORD +67 -15
- {mteb-2.0.5.dist-info → mteb-2.1.0.dist-info}/WHEEL +0 -0
- {mteb-2.0.5.dist-info → mteb-2.1.0.dist-info}/entry_points.txt +0 -0
- {mteb-2.0.5.dist-info → mteb-2.1.0.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.0.5.dist-info → mteb-2.1.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
{
|
|
2
|
+
"test": {
|
|
3
|
+
"num_samples": 1200,
|
|
4
|
+
"text_statistics": {
|
|
5
|
+
"total_text_length": 2034506,
|
|
6
|
+
"min_text_length": 184,
|
|
7
|
+
"average_text_length": 1695.4216666666666,
|
|
8
|
+
"max_text_length": 8825,
|
|
9
|
+
"unique_texts": 1200
|
|
10
|
+
},
|
|
11
|
+
"image_statistics": null,
|
|
12
|
+
"labels_statistics": {
|
|
13
|
+
"min_labels_per_text": 1,
|
|
14
|
+
"average_label_per_text": 1.0,
|
|
15
|
+
"max_labels_per_text": 1,
|
|
16
|
+
"unique_labels": 8,
|
|
17
|
+
"labels": {
|
|
18
|
+
"Opmerkelijk": {
|
|
19
|
+
"count": 150
|
|
20
|
+
},
|
|
21
|
+
"Buitenland": {
|
|
22
|
+
"count": 150
|
|
23
|
+
},
|
|
24
|
+
"Cultuur & Media": {
|
|
25
|
+
"count": 150
|
|
26
|
+
},
|
|
27
|
+
"Binnenland": {
|
|
28
|
+
"count": 150
|
|
29
|
+
},
|
|
30
|
+
"Politiek": {
|
|
31
|
+
"count": 150
|
|
32
|
+
},
|
|
33
|
+
"Economie": {
|
|
34
|
+
"count": 150
|
|
35
|
+
},
|
|
36
|
+
"Tech": {
|
|
37
|
+
"count": 150
|
|
38
|
+
},
|
|
39
|
+
"Regionaal nieuws": {
|
|
40
|
+
"count": 150
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
}
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
{
|
|
2
|
+
"test": {
|
|
3
|
+
"num_samples": 1200,
|
|
4
|
+
"text_statistics": {
|
|
5
|
+
"total_text_length": 59242,
|
|
6
|
+
"min_text_length": 18,
|
|
7
|
+
"average_text_length": 49.36833333333333,
|
|
8
|
+
"max_text_length": 94,
|
|
9
|
+
"unique_texts": 1200
|
|
10
|
+
},
|
|
11
|
+
"image_statistics": null,
|
|
12
|
+
"labels_statistics": {
|
|
13
|
+
"min_labels_per_text": 1,
|
|
14
|
+
"average_label_per_text": 1.0,
|
|
15
|
+
"max_labels_per_text": 1,
|
|
16
|
+
"unique_labels": 8,
|
|
17
|
+
"labels": {
|
|
18
|
+
"Opmerkelijk": {
|
|
19
|
+
"count": 150
|
|
20
|
+
},
|
|
21
|
+
"Buitenland": {
|
|
22
|
+
"count": 150
|
|
23
|
+
},
|
|
24
|
+
"Cultuur & Media": {
|
|
25
|
+
"count": 150
|
|
26
|
+
},
|
|
27
|
+
"Binnenland": {
|
|
28
|
+
"count": 150
|
|
29
|
+
},
|
|
30
|
+
"Politiek": {
|
|
31
|
+
"count": 150
|
|
32
|
+
},
|
|
33
|
+
"Economie": {
|
|
34
|
+
"count": 150
|
|
35
|
+
},
|
|
36
|
+
"Tech": {
|
|
37
|
+
"count": 150
|
|
38
|
+
},
|
|
39
|
+
"Regionaal nieuws": {
|
|
40
|
+
"count": 150
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
}
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
{
|
|
2
|
+
"test": {
|
|
3
|
+
"num_samples": 202,
|
|
4
|
+
"text_statistics": {
|
|
5
|
+
"total_text_length": 11827,
|
|
6
|
+
"min_text_length": 6,
|
|
7
|
+
"average_text_length": 58.54950495049505,
|
|
8
|
+
"max_text_length": 403,
|
|
9
|
+
"unique_texts": 202
|
|
10
|
+
},
|
|
11
|
+
"image_statistics": null,
|
|
12
|
+
"labels_statistics": {
|
|
13
|
+
"min_labels_per_text": 1,
|
|
14
|
+
"average_label_per_text": 1.0,
|
|
15
|
+
"max_labels_per_text": 1,
|
|
16
|
+
"unique_labels": 9,
|
|
17
|
+
"labels": {
|
|
18
|
+
"Geschiedenis": {
|
|
19
|
+
"count": 22
|
|
20
|
+
},
|
|
21
|
+
"Klassieke mythologie en Oude Geschiedenis": {
|
|
22
|
+
"count": 22
|
|
23
|
+
},
|
|
24
|
+
"Literatuur": {
|
|
25
|
+
"count": 23
|
|
26
|
+
},
|
|
27
|
+
"Natuur": {
|
|
28
|
+
"count": 23
|
|
29
|
+
},
|
|
30
|
+
"De mens, de mensheid in het algemeen": {
|
|
31
|
+
"count": 22
|
|
32
|
+
},
|
|
33
|
+
"Maatschappij, civilisatie en cultuur": {
|
|
34
|
+
"count": 22
|
|
35
|
+
},
|
|
36
|
+
"Abstracte idee\u00ebn en concepten": {
|
|
37
|
+
"count": 23
|
|
38
|
+
},
|
|
39
|
+
"Religie en magie": {
|
|
40
|
+
"count": 22
|
|
41
|
+
},
|
|
42
|
+
"Bijbel": {
|
|
43
|
+
"count": 23
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
}
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
{
|
|
2
|
+
"test": {
|
|
3
|
+
"num_samples": 4485,
|
|
4
|
+
"text_statistics": {
|
|
5
|
+
"total_text_length": 2177123,
|
|
6
|
+
"min_text_length": 18,
|
|
7
|
+
"average_text_length": 485.4231884057971,
|
|
8
|
+
"max_text_length": 12193,
|
|
9
|
+
"unique_texts": 4485
|
|
10
|
+
},
|
|
11
|
+
"image_statistics": null,
|
|
12
|
+
"labels_statistics": {
|
|
13
|
+
"min_labels_per_text": 1,
|
|
14
|
+
"average_label_per_text": 1.0,
|
|
15
|
+
"max_labels_per_text": 1,
|
|
16
|
+
"unique_labels": 30,
|
|
17
|
+
"labels": {
|
|
18
|
+
"Kantoormachines en gegevensverwerkende apparatuur, kantooruitrusting en -benodigdheden, uitgez. meubilair en softwarepakketten": {
|
|
19
|
+
"count": 150
|
|
20
|
+
},
|
|
21
|
+
"Medische apparatuur, farmaceutische artikelen en artikelen voor lichaamsverzorging": {
|
|
22
|
+
"count": 150
|
|
23
|
+
},
|
|
24
|
+
"Onderzoek en ontwikkeling, en aanverwante adviezen": {
|
|
25
|
+
"count": 147
|
|
26
|
+
},
|
|
27
|
+
"Zakelijke dienstverlening: juridisch, marketing, consulting, drukkerij en beveiliging": {
|
|
28
|
+
"count": 150
|
|
29
|
+
},
|
|
30
|
+
"Uitrusting voor veiligheid, brandweer, politie en leger": {
|
|
31
|
+
"count": 150
|
|
32
|
+
},
|
|
33
|
+
"Structuren en materialen voor de bouw; ondersteunende producten voor de bouw (uitgezonderd elektrische apparatuur)": {
|
|
34
|
+
"count": 149
|
|
35
|
+
},
|
|
36
|
+
"Diensten voor land-, bos- en tuinbouw, aquicultuur en imkerij": {
|
|
37
|
+
"count": 150
|
|
38
|
+
},
|
|
39
|
+
"Reparatie- en onderhoudsdiensten": {
|
|
40
|
+
"count": 150
|
|
41
|
+
},
|
|
42
|
+
"Overige gemeenschaps-, sociale en persoonlijke diensten": {
|
|
43
|
+
"count": 150
|
|
44
|
+
},
|
|
45
|
+
"IT-diensten: adviezen, softwareontwikkeling, internet en ondersteuning": {
|
|
46
|
+
"count": 150
|
|
47
|
+
},
|
|
48
|
+
"Kleding, schoeisel, bagageartikelen en accessoires": {
|
|
49
|
+
"count": 150
|
|
50
|
+
},
|
|
51
|
+
"Meubelen (m.i.v. kantoormeubelen), inrichtingsartikelen, huishoudelijke apparaten (uitgez. verlichting) en schoonmaakproducten": {
|
|
52
|
+
"count": 149
|
|
53
|
+
},
|
|
54
|
+
"Gezondheidszorg en maatschappelijk werk": {
|
|
55
|
+
"count": 150
|
|
56
|
+
},
|
|
57
|
+
"Laboratoriuminstrumenten, optische en precisie-instrumenten (uitgezonderd brillen)": {
|
|
58
|
+
"count": 149
|
|
59
|
+
},
|
|
60
|
+
"Voeding, dranken, tabak en aanverwante producten": {
|
|
61
|
+
"count": 150
|
|
62
|
+
},
|
|
63
|
+
"Bouwwerkzaamheden": {
|
|
64
|
+
"count": 150
|
|
65
|
+
},
|
|
66
|
+
"Bedrijfsmachines": {
|
|
67
|
+
"count": 149
|
|
68
|
+
},
|
|
69
|
+
"Elektrische machines, apparaten, uitrusting en verbruiksartikelen; verlichting": {
|
|
70
|
+
"count": 149
|
|
71
|
+
},
|
|
72
|
+
"Vervoersdiensten (uitg. vervoer van afval)": {
|
|
73
|
+
"count": 149
|
|
74
|
+
},
|
|
75
|
+
"Financi\u00eble en verzekeringsdiensten": {
|
|
76
|
+
"count": 150
|
|
77
|
+
},
|
|
78
|
+
"Radio-, televisie-, communicatie-, telecommunicatietoestellen en aanverwante apparatuur": {
|
|
79
|
+
"count": 150
|
|
80
|
+
},
|
|
81
|
+
"Diensten voor onderwijs en opleiding": {
|
|
82
|
+
"count": 149
|
|
83
|
+
},
|
|
84
|
+
"Drukwerk en aanverwante producten": {
|
|
85
|
+
"count": 149
|
|
86
|
+
},
|
|
87
|
+
"Vervoersmaterieel en bijbehorende producten": {
|
|
88
|
+
"count": 149
|
|
89
|
+
},
|
|
90
|
+
"Software en informatiesystemen": {
|
|
91
|
+
"count": 150
|
|
92
|
+
},
|
|
93
|
+
"Dienstverlening op het gebied van architectuur, bouwkunde, civiele techniek en inspectie": {
|
|
94
|
+
"count": 150
|
|
95
|
+
},
|
|
96
|
+
"Diensten inzake afvalwater, afval, reiniging en milieu": {
|
|
97
|
+
"count": 150
|
|
98
|
+
},
|
|
99
|
+
"Post- en telecommunicatiediensten": {
|
|
100
|
+
"count": 149
|
|
101
|
+
},
|
|
102
|
+
"Aardolieproducten, brandstof, elektriciteit en andere energiebronnen": {
|
|
103
|
+
"count": 149
|
|
104
|
+
},
|
|
105
|
+
"Diensten voor hotel, restaurant en detailhandel": {
|
|
106
|
+
"count": 149
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
}
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
{
|
|
2
|
+
"test": {
|
|
3
|
+
"num_samples": 4381,
|
|
4
|
+
"text_statistics": {
|
|
5
|
+
"total_text_length": 264422,
|
|
6
|
+
"min_text_length": 6,
|
|
7
|
+
"average_text_length": 60.3565396028304,
|
|
8
|
+
"max_text_length": 420,
|
|
9
|
+
"unique_texts": 4381
|
|
10
|
+
},
|
|
11
|
+
"image_statistics": null,
|
|
12
|
+
"labels_statistics": {
|
|
13
|
+
"min_labels_per_text": 1,
|
|
14
|
+
"average_label_per_text": 1.0,
|
|
15
|
+
"max_labels_per_text": 1,
|
|
16
|
+
"unique_labels": 30,
|
|
17
|
+
"labels": {
|
|
18
|
+
"Kantoormachines en gegevensverwerkende apparatuur, kantooruitrusting en -benodigdheden, uitgez. meubilair en softwarepakketten": {
|
|
19
|
+
"count": 143
|
|
20
|
+
},
|
|
21
|
+
"Medische apparatuur, farmaceutische artikelen en artikelen voor lichaamsverzorging": {
|
|
22
|
+
"count": 148
|
|
23
|
+
},
|
|
24
|
+
"Onderzoek en ontwikkeling, en aanverwante adviezen": {
|
|
25
|
+
"count": 147
|
|
26
|
+
},
|
|
27
|
+
"Zakelijke dienstverlening: juridisch, marketing, consulting, drukkerij en beveiliging": {
|
|
28
|
+
"count": 145
|
|
29
|
+
},
|
|
30
|
+
"Uitrusting voor veiligheid, brandweer, politie en leger": {
|
|
31
|
+
"count": 149
|
|
32
|
+
},
|
|
33
|
+
"Structuren en materialen voor de bouw; ondersteunende producten voor de bouw (uitgezonderd elektrische apparatuur)": {
|
|
34
|
+
"count": 146
|
|
35
|
+
},
|
|
36
|
+
"Diensten voor land-, bos- en tuinbouw, aquicultuur en imkerij": {
|
|
37
|
+
"count": 150
|
|
38
|
+
},
|
|
39
|
+
"Reparatie- en onderhoudsdiensten": {
|
|
40
|
+
"count": 148
|
|
41
|
+
},
|
|
42
|
+
"Overige gemeenschaps-, sociale en persoonlijke diensten": {
|
|
43
|
+
"count": 143
|
|
44
|
+
},
|
|
45
|
+
"IT-diensten: adviezen, softwareontwikkeling, internet en ondersteuning": {
|
|
46
|
+
"count": 149
|
|
47
|
+
},
|
|
48
|
+
"Kleding, schoeisel, bagageartikelen en accessoires": {
|
|
49
|
+
"count": 143
|
|
50
|
+
},
|
|
51
|
+
"Meubelen (m.i.v. kantoormeubelen), inrichtingsartikelen, huishoudelijke apparaten (uitgez. verlichting) en schoonmaakproducten": {
|
|
52
|
+
"count": 143
|
|
53
|
+
},
|
|
54
|
+
"Gezondheidszorg en maatschappelijk werk": {
|
|
55
|
+
"count": 147
|
|
56
|
+
},
|
|
57
|
+
"Laboratoriuminstrumenten, optische en precisie-instrumenten (uitgezonderd brillen)": {
|
|
58
|
+
"count": 147
|
|
59
|
+
},
|
|
60
|
+
"Voeding, dranken, tabak en aanverwante producten": {
|
|
61
|
+
"count": 149
|
|
62
|
+
},
|
|
63
|
+
"Bouwwerkzaamheden": {
|
|
64
|
+
"count": 149
|
|
65
|
+
},
|
|
66
|
+
"Bedrijfsmachines": {
|
|
67
|
+
"count": 149
|
|
68
|
+
},
|
|
69
|
+
"Elektrische machines, apparaten, uitrusting en verbruiksartikelen; verlichting": {
|
|
70
|
+
"count": 149
|
|
71
|
+
},
|
|
72
|
+
"Vervoersdiensten (uitg. vervoer van afval)": {
|
|
73
|
+
"count": 144
|
|
74
|
+
},
|
|
75
|
+
"Financi\u00eble en verzekeringsdiensten": {
|
|
76
|
+
"count": 144
|
|
77
|
+
},
|
|
78
|
+
"Radio-, televisie-, communicatie-, telecommunicatietoestellen en aanverwante apparatuur": {
|
|
79
|
+
"count": 145
|
|
80
|
+
},
|
|
81
|
+
"Diensten voor onderwijs en opleiding": {
|
|
82
|
+
"count": 148
|
|
83
|
+
},
|
|
84
|
+
"Drukwerk en aanverwante producten": {
|
|
85
|
+
"count": 137
|
|
86
|
+
},
|
|
87
|
+
"Vervoersmaterieel en bijbehorende producten": {
|
|
88
|
+
"count": 148
|
|
89
|
+
},
|
|
90
|
+
"Software en informatiesystemen": {
|
|
91
|
+
"count": 150
|
|
92
|
+
},
|
|
93
|
+
"Dienstverlening op het gebied van architectuur, bouwkunde, civiele techniek en inspectie": {
|
|
94
|
+
"count": 149
|
|
95
|
+
},
|
|
96
|
+
"Diensten inzake afvalwater, afval, reiniging en milieu": {
|
|
97
|
+
"count": 147
|
|
98
|
+
},
|
|
99
|
+
"Post- en telecommunicatiediensten": {
|
|
100
|
+
"count": 142
|
|
101
|
+
},
|
|
102
|
+
"Aardolieproducten, brandstof, elektriciteit en andere energiebronnen": {
|
|
103
|
+
"count": 143
|
|
104
|
+
},
|
|
105
|
+
"Diensten voor hotel, restaurant en detailhandel": {
|
|
106
|
+
"count": 140
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
}
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
{
|
|
2
|
+
"test": {
|
|
3
|
+
"num_samples": 195,
|
|
4
|
+
"text_statistics": {
|
|
5
|
+
"total_text_length": 186620,
|
|
6
|
+
"min_text_length": 30,
|
|
7
|
+
"average_text_length": 957.025641025641,
|
|
8
|
+
"max_text_length": 35167,
|
|
9
|
+
"unique_texts": 195
|
|
10
|
+
},
|
|
11
|
+
"image_statistics": null,
|
|
12
|
+
"labels_statistics": {
|
|
13
|
+
"min_labels_per_text": 1,
|
|
14
|
+
"average_label_per_text": 1.0,
|
|
15
|
+
"max_labels_per_text": 1,
|
|
16
|
+
"unique_labels": 13,
|
|
17
|
+
"labels": {
|
|
18
|
+
"Art History": {
|
|
19
|
+
"count": 15
|
|
20
|
+
},
|
|
21
|
+
"Sociology": {
|
|
22
|
+
"count": 15
|
|
23
|
+
},
|
|
24
|
+
"Law": {
|
|
25
|
+
"count": 15
|
|
26
|
+
},
|
|
27
|
+
"History": {
|
|
28
|
+
"count": 15
|
|
29
|
+
},
|
|
30
|
+
"Linguistics": {
|
|
31
|
+
"count": 15
|
|
32
|
+
},
|
|
33
|
+
"Social Health Sciences": {
|
|
34
|
+
"count": 15
|
|
35
|
+
},
|
|
36
|
+
"Political Sciences": {
|
|
37
|
+
"count": 15
|
|
38
|
+
},
|
|
39
|
+
"Educational Sciences": {
|
|
40
|
+
"count": 15
|
|
41
|
+
},
|
|
42
|
+
"Literature": {
|
|
43
|
+
"count": 15
|
|
44
|
+
},
|
|
45
|
+
"Theology": {
|
|
46
|
+
"count": 15
|
|
47
|
+
},
|
|
48
|
+
"Economics & Business": {
|
|
49
|
+
"count": 15
|
|
50
|
+
},
|
|
51
|
+
"Philosophy": {
|
|
52
|
+
"count": 15
|
|
53
|
+
},
|
|
54
|
+
"Criminology": {
|
|
55
|
+
"count": 15
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
}
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
{
|
|
2
|
+
"test": {
|
|
3
|
+
"num_samples": 195,
|
|
4
|
+
"text_statistics": {
|
|
5
|
+
"total_text_length": 14155,
|
|
6
|
+
"min_text_length": 13,
|
|
7
|
+
"average_text_length": 72.58974358974359,
|
|
8
|
+
"max_text_length": 182,
|
|
9
|
+
"unique_texts": 195
|
|
10
|
+
},
|
|
11
|
+
"image_statistics": null,
|
|
12
|
+
"labels_statistics": {
|
|
13
|
+
"min_labels_per_text": 1,
|
|
14
|
+
"average_label_per_text": 1.0,
|
|
15
|
+
"max_labels_per_text": 1,
|
|
16
|
+
"unique_labels": 13,
|
|
17
|
+
"labels": {
|
|
18
|
+
"Art History": {
|
|
19
|
+
"count": 15
|
|
20
|
+
},
|
|
21
|
+
"Sociology": {
|
|
22
|
+
"count": 15
|
|
23
|
+
},
|
|
24
|
+
"Law": {
|
|
25
|
+
"count": 15
|
|
26
|
+
},
|
|
27
|
+
"History": {
|
|
28
|
+
"count": 15
|
|
29
|
+
},
|
|
30
|
+
"Linguistics": {
|
|
31
|
+
"count": 15
|
|
32
|
+
},
|
|
33
|
+
"Social Health Sciences": {
|
|
34
|
+
"count": 15
|
|
35
|
+
},
|
|
36
|
+
"Political Sciences": {
|
|
37
|
+
"count": 15
|
|
38
|
+
},
|
|
39
|
+
"Educational Sciences": {
|
|
40
|
+
"count": 15
|
|
41
|
+
},
|
|
42
|
+
"Literature": {
|
|
43
|
+
"count": 15
|
|
44
|
+
},
|
|
45
|
+
"Theology": {
|
|
46
|
+
"count": 15
|
|
47
|
+
},
|
|
48
|
+
"Economics & Business": {
|
|
49
|
+
"count": 15
|
|
50
|
+
},
|
|
51
|
+
"Philosophy": {
|
|
52
|
+
"count": 15
|
|
53
|
+
},
|
|
54
|
+
"Criminology": {
|
|
55
|
+
"count": 15
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
}
|
mteb/descriptive_stats/MultilabelClassification/CovidDisinformationNLMultiLabelClassification.json
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
{
|
|
2
|
+
"test": {
|
|
3
|
+
"num_samples": 252,
|
|
4
|
+
"number_texts_intersect_with_train": null,
|
|
5
|
+
"text_statistics": {
|
|
6
|
+
"total_text_length": 48963,
|
|
7
|
+
"min_text_length": 35,
|
|
8
|
+
"average_text_length": 194.29761904761904,
|
|
9
|
+
"max_text_length": 337,
|
|
10
|
+
"unique_texts": 252
|
|
11
|
+
},
|
|
12
|
+
"image_statistics": null,
|
|
13
|
+
"label_statistics": {
|
|
14
|
+
"min_labels_per_text": 0,
|
|
15
|
+
"average_label_per_text": 2.115079365079365,
|
|
16
|
+
"max_labels_per_text": 6,
|
|
17
|
+
"unique_labels": 7,
|
|
18
|
+
"labels": {
|
|
19
|
+
"1": {
|
|
20
|
+
"count": 183
|
|
21
|
+
},
|
|
22
|
+
"None": {
|
|
23
|
+
"count": 50
|
|
24
|
+
},
|
|
25
|
+
"3": {
|
|
26
|
+
"count": 102
|
|
27
|
+
},
|
|
28
|
+
"5": {
|
|
29
|
+
"count": 83
|
|
30
|
+
},
|
|
31
|
+
"0": {
|
|
32
|
+
"count": 56
|
|
33
|
+
},
|
|
34
|
+
"2": {
|
|
35
|
+
"count": 62
|
|
36
|
+
},
|
|
37
|
+
"4": {
|
|
38
|
+
"count": 47
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
},
|
|
43
|
+
"train": {
|
|
44
|
+
"num_samples": 929,
|
|
45
|
+
"number_texts_intersect_with_train": null,
|
|
46
|
+
"text_statistics": {
|
|
47
|
+
"total_text_length": 174673,
|
|
48
|
+
"min_text_length": 33,
|
|
49
|
+
"average_text_length": 188.02260495156082,
|
|
50
|
+
"max_text_length": 714,
|
|
51
|
+
"unique_texts": 929
|
|
52
|
+
},
|
|
53
|
+
"image_statistics": null,
|
|
54
|
+
"label_statistics": {
|
|
55
|
+
"min_labels_per_text": 0,
|
|
56
|
+
"average_label_per_text": 1.9730893433799785,
|
|
57
|
+
"max_labels_per_text": 6,
|
|
58
|
+
"unique_labels": 7,
|
|
59
|
+
"labels": {
|
|
60
|
+
"1": {
|
|
61
|
+
"count": 709
|
|
62
|
+
},
|
|
63
|
+
"3": {
|
|
64
|
+
"count": 377
|
|
65
|
+
},
|
|
66
|
+
"0": {
|
|
67
|
+
"count": 164
|
|
68
|
+
},
|
|
69
|
+
"2": {
|
|
70
|
+
"count": 183
|
|
71
|
+
},
|
|
72
|
+
"4": {
|
|
73
|
+
"count": 138
|
|
74
|
+
},
|
|
75
|
+
"5": {
|
|
76
|
+
"count": 262
|
|
77
|
+
},
|
|
78
|
+
"None": {
|
|
79
|
+
"count": 168
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
}
|