red-datasets 0.0.7 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,69 @@
1
+ class PostalCodeJapanTest < Test::Unit::TestCase
2
+ sub_test_case(":reading") do
3
+ test(":lowercase") do
4
+ dataset = Datasets::PostalCodeJapan.new(reading: :lowercase)
5
+ assert_equal({
6
+ organization_code: "01101",
7
+ old_postal_code: "060",
8
+ postal_code: "0600000",
9
+ prefecture_reading: "ホッカイドウ",
10
+ city_reading: "サッポロシチュウオウク",
11
+ address_reading: "イカニケイサイガナイバアイ",
12
+ prefecture: "北海道",
13
+ city: "札幌市中央区",
14
+ address: "以下に掲載がない場合",
15
+ have_multiple_postal_codes: false,
16
+ have_address_number_per_koaza: false,
17
+ have_chome: false,
18
+ postal_code_is_shared: false,
19
+ changed: false,
20
+ change_reason: nil,
21
+ },
22
+ dataset.first.to_h)
23
+ end
24
+
25
+ test(":uppercase") do
26
+ dataset = Datasets::PostalCodeJapan.new(reading: :uppercase)
27
+ assert_equal({
28
+ organization_code: "01101",
29
+ old_postal_code: "060",
30
+ postal_code: "0600000",
31
+ prefecture_reading: "ホツカイドウ",
32
+ city_reading: "サツポロシチユウオウク",
33
+ address_reading: "イカニケイサイガナイバアイ",
34
+ prefecture: "北海道",
35
+ city: "札幌市中央区",
36
+ address: "以下に掲載がない場合",
37
+ have_multiple_postal_codes: false,
38
+ have_address_number_per_koaza: false,
39
+ have_chome: false,
40
+ postal_code_is_shared: false,
41
+ changed: false,
42
+ change_reason: nil,
43
+ },
44
+ dataset.first.to_h)
45
+ end
46
+
47
+ test(":romaji") do
48
+ dataset = Datasets::PostalCodeJapan.new(reading: :romaji)
49
+ assert_equal({
50
+ organization_code: nil,
51
+ old_postal_code: nil,
52
+ postal_code: "0600000",
53
+ prefecture_reading: "HOKKAIDO",
54
+ city_reading: "SAPPORO SHI CHUO KU",
55
+ address_reading: "IKANIKEISAIGANAIBAAI",
56
+ prefecture: "北海道",
57
+ city: "札幌市 中央区",
58
+ address: "以下に掲載がない場合",
59
+ have_multiple_postal_codes: false,
60
+ have_address_number_per_koaza: false,
61
+ have_chome: false,
62
+ postal_code_is_shared: false,
63
+ changed: false,
64
+ change_reason: nil,
65
+ },
66
+ dataset.first.to_h)
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,136 @@
1
+ class RdatasetsTest < Test::Unit::TestCase
2
+ sub_test_case("RdatasetsList") do
3
+ def setup
4
+ @dataset = Datasets::RdatasetsList.new
5
+ end
6
+
7
+ sub_test_case("#each") do
8
+ test("with package_name") do
9
+ records = @dataset.filter(package: "datasets").to_a
10
+ assert_equal([
11
+ 84,
12
+ {
13
+ package: "datasets",
14
+ dataset: "ability.cov",
15
+ title: "Ability and Intelligence Tests",
16
+ rows: 6,
17
+ cols: 8,
18
+ n_binary: 0,
19
+ n_character: 0,
20
+ n_factor: 0,
21
+ n_logical: 0,
22
+ n_numeric: 8,
23
+ csv: "https://vincentarelbundock.github.io/Rdatasets/csv/datasets/ability.cov.csv",
24
+ doc: "https://vincentarelbundock.github.io/Rdatasets/doc/datasets/ability.cov.html"
25
+ },
26
+ {
27
+ package: "datasets",
28
+ dataset: "WWWusage",
29
+ title: "Internet Usage per Minute",
30
+ rows: 100,
31
+ cols: 2,
32
+ n_binary: 0,
33
+ n_character: 0,
34
+ n_factor: 0,
35
+ n_logical: 0,
36
+ n_numeric: 2,
37
+ csv: "https://vincentarelbundock.github.io/Rdatasets/csv/datasets/WWWusage.csv",
38
+ doc: "https://vincentarelbundock.github.io/Rdatasets/doc/datasets/WWWusage.html"
39
+ }
40
+ ],
41
+ [
42
+ records.size,
43
+ records[0].to_h,
44
+ records[-1].to_h
45
+ ])
46
+ end
47
+
48
+ test("without package_name") do
49
+ records = @dataset.each.to_a
50
+ assert_equal([
51
+ 1478,
52
+ {
53
+ package: "AER",
54
+ dataset: "Affairs",
55
+ title: "Fair's Extramarital Affairs Data",
56
+ rows: 601,
57
+ cols: 9,
58
+ n_binary: 2,
59
+ n_character: 0,
60
+ n_factor: 2,
61
+ n_logical: 0,
62
+ n_numeric: 7,
63
+ csv: "https://vincentarelbundock.github.io/Rdatasets/csv/AER/Affairs.csv",
64
+ doc: "https://vincentarelbundock.github.io/Rdatasets/doc/AER/Affairs.html"
65
+ },
66
+ {
67
+ package: "vcd",
68
+ dataset: "WomenQueue",
69
+ title: "Women in Queues",
70
+ rows: 11,
71
+ cols: 2,
72
+ n_binary: 0,
73
+ n_character: 0,
74
+ n_factor: 1,
75
+ n_logical: 0,
76
+ n_numeric: 1,
77
+ csv: "https://vincentarelbundock.github.io/Rdatasets/csv/vcd/WomenQueue.csv",
78
+ doc: "https://vincentarelbundock.github.io/Rdatasets/doc/vcd/WomenQueue.html"
79
+ },
80
+ ],
81
+ [
82
+ records.size,
83
+ records[0].to_h,
84
+ records[-1].to_h
85
+ ])
86
+ end
87
+ end
88
+ end
89
+
90
+ sub_test_case("Rdatasets") do
91
+ sub_test_case("datasets") do
92
+ sub_test_case("AirPassengers") do
93
+ def setup
94
+ @dataset = Datasets::Rdatasets.new("datasets", "AirPassengers")
95
+ end
96
+
97
+ test("#each") do
98
+ records = @dataset.each.to_a
99
+ assert_equal([
100
+ 144,
101
+ { time: 1949, value: 112 },
102
+ { time: 1960.91666666667, value: 432 },
103
+ ],
104
+ [
105
+ records.size,
106
+ records[0],
107
+ records[-1]
108
+ ])
109
+ end
110
+
111
+ test("#metadata.id") do
112
+ assert_equal("rdatasets-datasets-AirPassengers", @dataset.metadata.id)
113
+ end
114
+
115
+ test("#metadata.description") do
116
+ description = @dataset.metadata.description
117
+ assert do
118
+ description.include?("Monthly Airline Passenger Numbers 1949-1960")
119
+ end
120
+ end
121
+ end
122
+
123
+ test("invalid dataset name") do
124
+ assert_raise(ArgumentError) do
125
+ Datasets::Rdatasets.new("datasets", "invalid datasets name")
126
+ end
127
+ end
128
+ end
129
+
130
+ test("invalid package name") do
131
+ assert_raise(ArgumentError) do
132
+ Datasets::Rdatasets.new("invalid package name", "AirPassengers")
133
+ end
134
+ end
135
+ end
136
+ end
data/test/test-table.rb CHANGED
@@ -3,9 +3,129 @@ class TableTest < Test::Unit::TestCase
3
3
  @table = Datasets::Iris.new.to_table
4
4
  end
5
5
 
6
- test("#[]") do
7
- assert_equal([1.4, 1.4, 1.3, 1.5, 1.4],
8
- @table[:petal_length].first(5))
6
+ test("#n_columns") do
7
+ assert_equal(5, @table.n_columns)
8
+ end
9
+
10
+ test("#n_rows") do
11
+ assert_equal(150, @table.n_rows)
12
+ end
13
+
14
+ test("#column_names") do
15
+ assert_equal([
16
+ :sepal_length,
17
+ :sepal_width,
18
+ :petal_length,
19
+ :petal_width,
20
+ :label,
21
+ ],
22
+ @table.column_names)
23
+ end
24
+
25
+ test("#each") do
26
+ shorten_hash = {}
27
+ @table.each do |name, values|
28
+ shorten_hash[name] = values.first(5)
29
+ end
30
+ assert_equal({
31
+ :label => ["Iris-setosa"] * 5,
32
+ :petal_length => [1.4, 1.4, 1.3, 1.5, 1.4],
33
+ :petal_width => [0.2, 0.2, 0.2, 0.2, 0.2],
34
+ :sepal_length => [5.1, 4.9, 4.7, 4.6, 5.0],
35
+ :sepal_width => [3.5, 3.0, 3.2, 3.1, 3.6],
36
+ },
37
+ shorten_hash)
38
+ end
39
+
40
+ test("#each_column") do
41
+ shorten_hash = {}
42
+ @table.each_column do |name, values|
43
+ shorten_hash[name] = values.first(5)
44
+ end
45
+ assert_equal({
46
+ :label => ["Iris-setosa"] * 5,
47
+ :petal_length => [1.4, 1.4, 1.3, 1.5, 1.4],
48
+ :petal_width => [0.2, 0.2, 0.2, 0.2, 0.2],
49
+ :sepal_length => [5.1, 4.9, 4.7, 4.6, 5.0],
50
+ :sepal_width => [3.5, 3.0, 3.2, 3.1, 3.6],
51
+ },
52
+ shorten_hash)
53
+ end
54
+
55
+ test("#each_record") do
56
+ records = []
57
+ @table.each_record do |record|
58
+ records << record
59
+ break if records.size == 3
60
+ end
61
+ assert_equal([
62
+ {
63
+ label: "Iris-setosa",
64
+ petal_length: 1.4,
65
+ petal_width: 0.2,
66
+ sepal_length: 5.1,
67
+ sepal_width: 3.5,
68
+ },
69
+ {
70
+ label: "Iris-setosa",
71
+ petal_length: 1.4,
72
+ petal_width: 0.2,
73
+ sepal_length: 4.9,
74
+ sepal_width: 3.0,
75
+ },
76
+ {
77
+ label: "Iris-setosa",
78
+ petal_length: 1.3,
79
+ petal_width: 0.2,
80
+ sepal_length: 4.7,
81
+ sepal_width: 3.2,
82
+ },
83
+ ],
84
+ records.collect(&:to_h))
85
+ end
86
+
87
+ sub_test_case("#find_record") do
88
+ test("positive") do
89
+ assert_equal({
90
+ label: "Iris-setosa",
91
+ petal_length: 1.4,
92
+ petal_width: 0.2,
93
+ sepal_length: 4.9,
94
+ sepal_width: 3.0,
95
+ },
96
+ @table.find_record(1).to_h)
97
+ end
98
+
99
+ test("positive - over") do
100
+ assert_nil(@table.find_record(151))
101
+ end
102
+
103
+ test("negative") do
104
+ assert_equal({
105
+ label: "Iris-virginica",
106
+ petal_length: 5.1,
107
+ petal_width: 1.8,
108
+ sepal_length: 5.9,
109
+ sepal_width: 3.0,
110
+ },
111
+ @table.find_record(-1).to_h)
112
+ end
113
+
114
+ test("negative - over") do
115
+ assert_nil(@table.find_record(-151))
116
+ end
117
+ end
118
+
119
+ sub_test_case("#[]") do
120
+ test("index") do
121
+ assert_equal([1.4, 1.4, 1.3, 1.5, 1.4],
122
+ @table[2].first(5))
123
+ end
124
+
125
+ test("name") do
126
+ assert_equal([1.4, 1.4, 1.3, 1.5, 1.4],
127
+ @table[:petal_length].first(5))
128
+ end
9
129
  end
10
130
 
11
131
  test("#dictionary_encode") do
@@ -58,21 +178,6 @@ class TableTest < Test::Unit::TestCase
58
178
  end
59
179
  end
60
180
 
61
- test("#each") do
62
- shorten_hash = {}
63
- @table.each do |name, values|
64
- shorten_hash[name] = values.first(5)
65
- end
66
- assert_equal({
67
- :label => ["Iris-setosa"] * 5,
68
- :petal_length => [1.4, 1.4, 1.3, 1.5, 1.4],
69
- :petal_width => [0.2, 0.2, 0.2, 0.2, 0.2],
70
- :sepal_length => [5.1, 4.9, 4.7, 4.6, 5.0],
71
- :sepal_width => [3.5, 3.0, 3.2, 3.1, 3.6],
72
- },
73
- shorten_hash)
74
- end
75
-
76
181
  test("#to_h") do
77
182
  shorten_hash = {}
78
183
  @table.to_h.each do |name, values|
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: red-datasets
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.7
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - tomisuker
@@ -9,8 +9,50 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2018-11-20 00:00:00.000000000 Z
12
+ date: 2021-06-03 00:00:00.000000000 Z
13
13
  dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: csv
16
+ requirement: !ruby/object:Gem::Requirement
17
+ requirements:
18
+ - - ">="
19
+ - !ruby/object:Gem::Version
20
+ version: 3.0.5
21
+ type: :runtime
22
+ prerelease: false
23
+ version_requirements: !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - ">="
26
+ - !ruby/object:Gem::Version
27
+ version: 3.0.5
28
+ - !ruby/object:Gem::Dependency
29
+ name: rexml
30
+ requirement: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - ">="
33
+ - !ruby/object:Gem::Version
34
+ version: '0'
35
+ type: :runtime
36
+ prerelease: false
37
+ version_requirements: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - ">="
40
+ - !ruby/object:Gem::Version
41
+ version: '0'
42
+ - !ruby/object:Gem::Dependency
43
+ name: rubyzip
44
+ requirement: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - ">="
47
+ - !ruby/object:Gem::Version
48
+ version: '0'
49
+ type: :runtime
50
+ prerelease: false
51
+ version_requirements: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ version: '0'
14
56
  - !ruby/object:Gem::Dependency
15
57
  name: bundler
16
58
  requirement: !ruby/object:Gem::Requirement
@@ -84,7 +126,7 @@ dependencies:
84
126
  description: 'You can use datasets easily because you can access each dataset with
85
127
  multiple ways such as `#each` and Apache Arrow Record Batch.
86
128
 
87
- '
129
+ '
88
130
  email:
89
131
  - tomisuker16@gmail.com
90
132
  - kou@clear-code.com
@@ -101,15 +143,27 @@ files:
101
143
  - lib/datasets.rb
102
144
  - lib/datasets/adult.rb
103
145
  - lib/datasets/cifar.rb
146
+ - lib/datasets/cldr-plurals.rb
147
+ - lib/datasets/communities.rb
104
148
  - lib/datasets/dataset.rb
105
149
  - lib/datasets/dictionary.rb
106
150
  - lib/datasets/downloader.rb
151
+ - lib/datasets/e-stat-japan.rb
152
+ - lib/datasets/error.rb
107
153
  - lib/datasets/fashion-mnist.rb
154
+ - lib/datasets/hepatitis.rb
108
155
  - lib/datasets/iris.rb
156
+ - lib/datasets/libsvm-dataset-list.rb
157
+ - lib/datasets/libsvm.rb
109
158
  - lib/datasets/metadata.rb
110
159
  - lib/datasets/mnist.rb
160
+ - lib/datasets/mushroom.rb
161
+ - lib/datasets/penguins.rb
111
162
  - lib/datasets/penn-treebank.rb
163
+ - lib/datasets/postal-code-japan.rb
164
+ - lib/datasets/rdatasets.rb
112
165
  - lib/datasets/table.rb
166
+ - lib/datasets/tar_gz_readable.rb
113
167
  - lib/datasets/version.rb
114
168
  - lib/datasets/wikipedia.rb
115
169
  - lib/datasets/wine.rb
@@ -118,11 +172,23 @@ files:
118
172
  - test/run-test.rb
119
173
  - test/test-adult.rb
120
174
  - test/test-cifar.rb
175
+ - test/test-cldr-plurals.rb
176
+ - test/test-communities.rb
177
+ - test/test-dataset.rb
121
178
  - test/test-dictionary.rb
179
+ - test/test-downloader.rb
180
+ - test/test-e-stat-japan.rb
122
181
  - test/test-fashion-mnist.rb
182
+ - test/test-hepatitis.rb
123
183
  - test/test-iris.rb
184
+ - test/test-libsvm-dataset-list.rb
185
+ - test/test-libsvm.rb
124
186
  - test/test-mnist.rb
187
+ - test/test-mushroom.rb
188
+ - test/test-penguins.rb
125
189
  - test/test-penn-treebank.rb
190
+ - test/test-postal-code-japan.rb
191
+ - test/test-rdatasets.rb
126
192
  - test/test-table.rb
127
193
  - test/test-wikipedia.rb
128
194
  - test/test-wine.rb
@@ -145,21 +211,32 @@ required_rubygems_version: !ruby/object:Gem::Requirement
145
211
  - !ruby/object:Gem::Version
146
212
  version: '0'
147
213
  requirements: []
148
- rubyforge_project:
149
- rubygems_version: 3.0.0.beta2
214
+ rubygems_version: 3.3.0.dev
150
215
  signing_key:
151
216
  specification_version: 4
152
217
  summary: Red Datasets provides classes that provide common datasets such as iris dataset.
153
218
  test_files:
154
- - test/test-wine.rb
219
+ - test/helper.rb
155
220
  - test/run-test.rb
221
+ - test/test-adult.rb
156
222
  - test/test-cifar.rb
223
+ - test/test-cldr-plurals.rb
224
+ - test/test-communities.rb
225
+ - test/test-dataset.rb
226
+ - test/test-dictionary.rb
227
+ - test/test-downloader.rb
228
+ - test/test-e-stat-japan.rb
157
229
  - test/test-fashion-mnist.rb
158
- - test/test-wikipedia.rb
230
+ - test/test-hepatitis.rb
159
231
  - test/test-iris.rb
160
- - test/helper.rb
232
+ - test/test-libsvm-dataset-list.rb
233
+ - test/test-libsvm.rb
161
234
  - test/test-mnist.rb
162
- - test/test-table.rb
163
- - test/test-adult.rb
235
+ - test/test-mushroom.rb
236
+ - test/test-penguins.rb
164
237
  - test/test-penn-treebank.rb
165
- - test/test-dictionary.rb
238
+ - test/test-postal-code-japan.rb
239
+ - test/test-rdatasets.rb
240
+ - test/test-table.rb
241
+ - test/test-wikipedia.rb
242
+ - test/test-wine.rb