red-datasets 0.1.0 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,136 @@
1
+ class RdatasetsTest < Test::Unit::TestCase
2
+ sub_test_case("RdatasetsList") do
3
+ def setup
4
+ @dataset = Datasets::RdatasetsList.new
5
+ end
6
+
7
+ sub_test_case("#each") do
8
+ test("with package_name") do
9
+ records = @dataset.filter(package: "datasets").to_a
10
+ assert_equal([
11
+ 84,
12
+ {
13
+ package: "datasets",
14
+ dataset: "ability.cov",
15
+ title: "Ability and Intelligence Tests",
16
+ rows: 6,
17
+ cols: 8,
18
+ n_binary: 0,
19
+ n_character: 0,
20
+ n_factor: 0,
21
+ n_logical: 0,
22
+ n_numeric: 8,
23
+ csv: "https://vincentarelbundock.github.io/Rdatasets/csv/datasets/ability.cov.csv",
24
+ doc: "https://vincentarelbundock.github.io/Rdatasets/doc/datasets/ability.cov.html"
25
+ },
26
+ {
27
+ package: "datasets",
28
+ dataset: "WWWusage",
29
+ title: "Internet Usage per Minute",
30
+ rows: 100,
31
+ cols: 2,
32
+ n_binary: 0,
33
+ n_character: 0,
34
+ n_factor: 0,
35
+ n_logical: 0,
36
+ n_numeric: 2,
37
+ csv: "https://vincentarelbundock.github.io/Rdatasets/csv/datasets/WWWusage.csv",
38
+ doc: "https://vincentarelbundock.github.io/Rdatasets/doc/datasets/WWWusage.html"
39
+ }
40
+ ],
41
+ [
42
+ records.size,
43
+ records[0].to_h,
44
+ records[-1].to_h
45
+ ])
46
+ end
47
+
48
+ test("without package_name") do
49
+ records = @dataset.each.to_a
50
+ assert_equal([
51
+ 1714,
52
+ {
53
+ package: "AER",
54
+ dataset: "Affairs",
55
+ title: "Fair's Extramarital Affairs Data",
56
+ rows: 601,
57
+ cols: 9,
58
+ n_binary: 2,
59
+ n_character: 0,
60
+ n_factor: 2,
61
+ n_logical: 0,
62
+ n_numeric: 7,
63
+ csv: "https://vincentarelbundock.github.io/Rdatasets/csv/AER/Affairs.csv",
64
+ doc: "https://vincentarelbundock.github.io/Rdatasets/doc/AER/Affairs.html"
65
+ },
66
+ {
67
+ package: "vcd",
68
+ dataset: "WomenQueue",
69
+ title: "Women in Queues",
70
+ rows: 11,
71
+ cols: 2,
72
+ n_binary: 0,
73
+ n_character: 0,
74
+ n_factor: 1,
75
+ n_logical: 0,
76
+ n_numeric: 1,
77
+ csv: "https://vincentarelbundock.github.io/Rdatasets/csv/vcd/WomenQueue.csv",
78
+ doc: "https://vincentarelbundock.github.io/Rdatasets/doc/vcd/WomenQueue.html"
79
+ },
80
+ ],
81
+ [
82
+ records.size,
83
+ records[0].to_h,
84
+ records[-1].to_h
85
+ ])
86
+ end
87
+ end
88
+ end
89
+
90
+ sub_test_case("Rdatasets") do
91
+ sub_test_case("datasets") do
92
+ sub_test_case("AirPassengers") do
93
+ def setup
94
+ @dataset = Datasets::Rdatasets.new("datasets", "AirPassengers")
95
+ end
96
+
97
+ test("#each") do
98
+ records = @dataset.each.to_a
99
+ assert_equal([
100
+ 144,
101
+ { time: 1949, value: 112 },
102
+ { time: 1960.91666666667, value: 432 },
103
+ ],
104
+ [
105
+ records.size,
106
+ records[0],
107
+ records[-1]
108
+ ])
109
+ end
110
+
111
+ test("#metadata.id") do
112
+ assert_equal("rdatasets-datasets-AirPassengers", @dataset.metadata.id)
113
+ end
114
+
115
+ test("#metadata.description") do
116
+ description = @dataset.metadata.description
117
+ assert do
118
+ description.include?("Monthly Airline Passenger Numbers 1949-1960")
119
+ end
120
+ end
121
+ end
122
+
123
+ test("invalid dataset name") do
124
+ assert_raise(ArgumentError) do
125
+ Datasets::Rdatasets.new("datasets", "invalid datasets name")
126
+ end
127
+ end
128
+ end
129
+
130
+ test("invalid package name") do
131
+ assert_raise(ArgumentError) do
132
+ Datasets::Rdatasets.new("invalid package name", "AirPassengers")
133
+ end
134
+ end
135
+ end
136
+ end
@@ -0,0 +1,97 @@
1
+ class SeabornDataTest < Test::Unit::TestCase
2
+ sub_test_case("fmri") do
3
+ def setup
4
+ @dataset = Datasets::SeabornData.new("fmri")
5
+ end
6
+
7
+ def test_each
8
+ records = @dataset.each.to_a
9
+ assert_equal([
10
+ 1064,
11
+ {
12
+ subject: "s5",
13
+ timepoint: 14,
14
+ event: "stim",
15
+ region: "parietal",
16
+ signal: -0.0808829319505
17
+ },
18
+ {
19
+ subject: "s0",
20
+ timepoint: 0,
21
+ event: "cue",
22
+ region: "parietal",
23
+ signal: -0.00689923478092
24
+ }
25
+ ],
26
+ [
27
+ records.size,
28
+ records[1].to_h,
29
+ records[-1].to_h
30
+ ])
31
+ end
32
+ end
33
+
34
+ sub_test_case("flights") do
35
+ def setup
36
+ @dataset = Datasets::SeabornData.new("flights")
37
+ end
38
+
39
+ def test_each
40
+ records = @dataset.each.to_a
41
+ assert_equal([
42
+ 144,
43
+ {
44
+ year: 1949,
45
+ month: "Feb",
46
+ passengers: 118
47
+ },
48
+ {
49
+ year: 1960,
50
+ month: "Dec",
51
+ passengers: 432
52
+ }
53
+ ],
54
+ [
55
+ records.size,
56
+ records[1].to_h,
57
+ records[-1].to_h
58
+ ])
59
+ end
60
+ end
61
+
62
+ sub_test_case("penguins") do
63
+ def setup
64
+ @dataset = Datasets::SeabornData.new("penguins")
65
+ end
66
+
67
+ def test_each
68
+ records = @dataset.each.to_a
69
+ assert_equal([
70
+ 344,
71
+ {
72
+ species: "Adelie",
73
+ island: "Torgersen",
74
+ bill_length_mm: 39.5,
75
+ bill_depth_mm: 17.4,
76
+ flipper_length_mm: 186,
77
+ body_mass_g: 3800,
78
+ sex: "Female"
79
+ },
80
+ {
81
+ species: "Gentoo",
82
+ island: "Biscoe",
83
+ bill_length_mm: 49.9,
84
+ bill_depth_mm: 16.1,
85
+ flipper_length_mm: 213,
86
+ body_mass_g: 5400,
87
+ sex: "Male"
88
+ }
89
+ ],
90
+ [
91
+ records.size,
92
+ records[1].to_h,
93
+ records[-1].to_h
94
+ ])
95
+ end
96
+ end
97
+ end
@@ -0,0 +1,48 @@
1
+ class SudachiSynonymDictionaryTest < Test::Unit::TestCase
2
+ def setup
3
+ @dataset = Datasets::SudachiSynonymDictionary.new
4
+ end
5
+
6
+ test('#each') do
7
+ records = @dataset.each.to_a
8
+ assert_equal([
9
+ 61335,
10
+ {
11
+ group_id: "000001",
12
+ is_noun: true,
13
+ expansion_type: :always,
14
+ lexeme_id: 1,
15
+ form_type: :typical,
16
+ acronym_type: :typical,
17
+ variant_type: :typical,
18
+ categories: [],
19
+ notation: "曖昧",
20
+ },
21
+ {
22
+ group_id: "023705",
23
+ is_noun: true,
24
+ expansion_type: :always,
25
+ lexeme_id: 1,
26
+ form_type: :typical,
27
+ acronym_type: :alphabet,
28
+ variant_type: :typical,
29
+ categories: ["単位"],
30
+ notation: "GB",
31
+ },
32
+ ],
33
+ [
34
+ records.size,
35
+ records[0].to_h,
36
+ records[-1].to_h,
37
+ ])
38
+ end
39
+
40
+ sub_test_case('#metadata') do
41
+ test('#description') do
42
+ description = @dataset.metadata.description
43
+ assert do
44
+ description.start_with?('# Sudachi 同義語辞書')
45
+ end
46
+ end
47
+ end
48
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: red-datasets
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - tomisuker
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2020-02-04 00:00:00.000000000 Z
12
+ date: 2021-07-13 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: csv
@@ -25,6 +25,20 @@ dependencies:
25
25
  - - ">="
26
26
  - !ruby/object:Gem::Version
27
27
  version: 3.0.5
28
+ - !ruby/object:Gem::Dependency
29
+ name: rexml
30
+ requirement: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - ">="
33
+ - !ruby/object:Gem::Version
34
+ version: '0'
35
+ type: :runtime
36
+ prerelease: false
37
+ version_requirements: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - ">="
40
+ - !ruby/object:Gem::Version
41
+ version: '0'
28
42
  - !ruby/object:Gem::Dependency
29
43
  name: rubyzip
30
44
  requirement: !ruby/object:Gem::Requirement
@@ -129,9 +143,13 @@ files:
129
143
  - lib/datasets.rb
130
144
  - lib/datasets/adult.rb
131
145
  - lib/datasets/cifar.rb
146
+ - lib/datasets/cldr-plurals.rb
147
+ - lib/datasets/communities.rb
132
148
  - lib/datasets/dataset.rb
133
149
  - lib/datasets/dictionary.rb
134
150
  - lib/datasets/downloader.rb
151
+ - lib/datasets/e-stat-japan.rb
152
+ - lib/datasets/error.rb
135
153
  - lib/datasets/fashion-mnist.rb
136
154
  - lib/datasets/hepatitis.rb
137
155
  - lib/datasets/iris.rb
@@ -140,9 +158,14 @@ files:
140
158
  - lib/datasets/metadata.rb
141
159
  - lib/datasets/mnist.rb
142
160
  - lib/datasets/mushroom.rb
161
+ - lib/datasets/penguins.rb
143
162
  - lib/datasets/penn-treebank.rb
144
163
  - lib/datasets/postal-code-japan.rb
164
+ - lib/datasets/rdatasets.rb
165
+ - lib/datasets/seaborn-data.rb
166
+ - lib/datasets/sudachi-synonym-dictionary.rb
145
167
  - lib/datasets/table.rb
168
+ - lib/datasets/tar-gz-readable.rb
146
169
  - lib/datasets/version.rb
147
170
  - lib/datasets/wikipedia.rb
148
171
  - lib/datasets/wine.rb
@@ -151,7 +174,12 @@ files:
151
174
  - test/run-test.rb
152
175
  - test/test-adult.rb
153
176
  - test/test-cifar.rb
177
+ - test/test-cldr-plurals.rb
178
+ - test/test-communities.rb
179
+ - test/test-dataset.rb
154
180
  - test/test-dictionary.rb
181
+ - test/test-downloader.rb
182
+ - test/test-e-stat-japan.rb
155
183
  - test/test-fashion-mnist.rb
156
184
  - test/test-hepatitis.rb
157
185
  - test/test-iris.rb
@@ -159,8 +187,12 @@ files:
159
187
  - test/test-libsvm.rb
160
188
  - test/test-mnist.rb
161
189
  - test/test-mushroom.rb
190
+ - test/test-penguins.rb
162
191
  - test/test-penn-treebank.rb
163
192
  - test/test-postal-code-japan.rb
193
+ - test/test-rdatasets.rb
194
+ - test/test-seaborn-data.rb
195
+ - test/test-sudachi-synonym-dictionary.rb
164
196
  - test/test-table.rb
165
197
  - test/test-wikipedia.rb
166
198
  - test/test-wine.rb
@@ -183,26 +215,34 @@ required_rubygems_version: !ruby/object:Gem::Requirement
183
215
  - !ruby/object:Gem::Version
184
216
  version: '0'
185
217
  requirements: []
186
- rubyforge_project:
187
- rubygems_version: 2.7.6.2
218
+ rubygems_version: 3.3.0.dev
188
219
  signing_key:
189
220
  specification_version: 4
190
221
  summary: Red Datasets provides classes that provide common datasets such as iris dataset.
191
222
  test_files:
192
- - test/test-wine.rb
193
- - test/test-iris.rb
194
- - test/test-wikipedia.rb
195
- - test/test-mnist.rb
196
223
  - test/helper.rb
197
- - test/test-penn-treebank.rb
198
224
  - test/run-test.rb
199
- - test/test-table.rb
200
- - test/test-fashion-mnist.rb
225
+ - test/test-adult.rb
201
226
  - test/test-cifar.rb
227
+ - test/test-cldr-plurals.rb
228
+ - test/test-communities.rb
229
+ - test/test-dataset.rb
202
230
  - test/test-dictionary.rb
203
- - test/test-mushroom.rb
204
- - test/test-libsvm-dataset-list.rb
231
+ - test/test-downloader.rb
232
+ - test/test-e-stat-japan.rb
233
+ - test/test-fashion-mnist.rb
205
234
  - test/test-hepatitis.rb
206
- - test/test-adult.rb
207
- - test/test-postal-code-japan.rb
235
+ - test/test-iris.rb
236
+ - test/test-libsvm-dataset-list.rb
208
237
  - test/test-libsvm.rb
238
+ - test/test-mnist.rb
239
+ - test/test-mushroom.rb
240
+ - test/test-penguins.rb
241
+ - test/test-penn-treebank.rb
242
+ - test/test-postal-code-japan.rb
243
+ - test/test-rdatasets.rb
244
+ - test/test-seaborn-data.rb
245
+ - test/test-sudachi-synonym-dictionary.rb
246
+ - test/test-table.rb
247
+ - test/test-wikipedia.rb
248
+ - test/test-wine.rb