red-datasets 0.0.7 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,69 @@
1
+ class PostalCodeJapanTest < Test::Unit::TestCase
2
+ sub_test_case(":reading") do
3
+ test(":lowercase") do
4
+ dataset = Datasets::PostalCodeJapan.new(reading: :lowercase)
5
+ assert_equal({
6
+ organization_code: "01101",
7
+ old_postal_code: "060",
8
+ postal_code: "0600000",
9
+ prefecture_reading: "ホッカイドウ",
10
+ city_reading: "サッポロシチュウオウク",
11
+ address_reading: "イカニケイサイガナイバアイ",
12
+ prefecture: "北海道",
13
+ city: "札幌市中央区",
14
+ address: "以下に掲載がない場合",
15
+ have_multiple_postal_codes: false,
16
+ have_address_number_per_koaza: false,
17
+ have_chome: false,
18
+ postal_code_is_shared: false,
19
+ changed: false,
20
+ change_reason: nil,
21
+ },
22
+ dataset.first.to_h)
23
+ end
24
+
25
+ test(":uppercase") do
26
+ dataset = Datasets::PostalCodeJapan.new(reading: :uppercase)
27
+ assert_equal({
28
+ organization_code: "01101",
29
+ old_postal_code: "060",
30
+ postal_code: "0600000",
31
+ prefecture_reading: "ホツカイドウ",
32
+ city_reading: "サツポロシチユウオウク",
33
+ address_reading: "イカニケイサイガナイバアイ",
34
+ prefecture: "北海道",
35
+ city: "札幌市中央区",
36
+ address: "以下に掲載がない場合",
37
+ have_multiple_postal_codes: false,
38
+ have_address_number_per_koaza: false,
39
+ have_chome: false,
40
+ postal_code_is_shared: false,
41
+ changed: false,
42
+ change_reason: nil,
43
+ },
44
+ dataset.first.to_h)
45
+ end
46
+
47
+ test(":romaji") do
48
+ dataset = Datasets::PostalCodeJapan.new(reading: :romaji)
49
+ assert_equal({
50
+ organization_code: nil,
51
+ old_postal_code: nil,
52
+ postal_code: "0600000",
53
+ prefecture_reading: "HOKKAIDO",
54
+ city_reading: "SAPPORO SHI CHUO KU",
55
+ address_reading: "IKANIKEISAIGANAIBAAI",
56
+ prefecture: "北海道",
57
+ city: "札幌市 中央区",
58
+ address: "以下に掲載がない場合",
59
+ have_multiple_postal_codes: false,
60
+ have_address_number_per_koaza: false,
61
+ have_chome: false,
62
+ postal_code_is_shared: false,
63
+ changed: false,
64
+ change_reason: nil,
65
+ },
66
+ dataset.first.to_h)
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,136 @@
1
+ class RdatasetsTest < Test::Unit::TestCase
2
+ sub_test_case("RdatasetsList") do
3
+ def setup
4
+ @dataset = Datasets::RdatasetsList.new
5
+ end
6
+
7
+ sub_test_case("#each") do
8
+ test("with package_name") do
9
+ records = @dataset.filter(package: "datasets").to_a
10
+ assert_equal([
11
+ 84,
12
+ {
13
+ package: "datasets",
14
+ dataset: "ability.cov",
15
+ title: "Ability and Intelligence Tests",
16
+ rows: 6,
17
+ cols: 8,
18
+ n_binary: 0,
19
+ n_character: 0,
20
+ n_factor: 0,
21
+ n_logical: 0,
22
+ n_numeric: 8,
23
+ csv: "https://vincentarelbundock.github.io/Rdatasets/csv/datasets/ability.cov.csv",
24
+ doc: "https://vincentarelbundock.github.io/Rdatasets/doc/datasets/ability.cov.html"
25
+ },
26
+ {
27
+ package: "datasets",
28
+ dataset: "WWWusage",
29
+ title: "Internet Usage per Minute",
30
+ rows: 100,
31
+ cols: 2,
32
+ n_binary: 0,
33
+ n_character: 0,
34
+ n_factor: 0,
35
+ n_logical: 0,
36
+ n_numeric: 2,
37
+ csv: "https://vincentarelbundock.github.io/Rdatasets/csv/datasets/WWWusage.csv",
38
+ doc: "https://vincentarelbundock.github.io/Rdatasets/doc/datasets/WWWusage.html"
39
+ }
40
+ ],
41
+ [
42
+ records.size,
43
+ records[0].to_h,
44
+ records[-1].to_h
45
+ ])
46
+ end
47
+
48
+ test("without package_name") do
49
+ records = @dataset.each.to_a
50
+ assert_equal([
51
+ 1478,
52
+ {
53
+ package: "AER",
54
+ dataset: "Affairs",
55
+ title: "Fair's Extramarital Affairs Data",
56
+ rows: 601,
57
+ cols: 9,
58
+ n_binary: 2,
59
+ n_character: 0,
60
+ n_factor: 2,
61
+ n_logical: 0,
62
+ n_numeric: 7,
63
+ csv: "https://vincentarelbundock.github.io/Rdatasets/csv/AER/Affairs.csv",
64
+ doc: "https://vincentarelbundock.github.io/Rdatasets/doc/AER/Affairs.html"
65
+ },
66
+ {
67
+ package: "vcd",
68
+ dataset: "WomenQueue",
69
+ title: "Women in Queues",
70
+ rows: 11,
71
+ cols: 2,
72
+ n_binary: 0,
73
+ n_character: 0,
74
+ n_factor: 1,
75
+ n_logical: 0,
76
+ n_numeric: 1,
77
+ csv: "https://vincentarelbundock.github.io/Rdatasets/csv/vcd/WomenQueue.csv",
78
+ doc: "https://vincentarelbundock.github.io/Rdatasets/doc/vcd/WomenQueue.html"
79
+ },
80
+ ],
81
+ [
82
+ records.size,
83
+ records[0].to_h,
84
+ records[-1].to_h
85
+ ])
86
+ end
87
+ end
88
+ end
89
+
90
+ sub_test_case("Rdatasets") do
91
+ sub_test_case("datasets") do
92
+ sub_test_case("AirPassengers") do
93
+ def setup
94
+ @dataset = Datasets::Rdatasets.new("datasets", "AirPassengers")
95
+ end
96
+
97
+ test("#each") do
98
+ records = @dataset.each.to_a
99
+ assert_equal([
100
+ 144,
101
+ { time: 1949, value: 112 },
102
+ { time: 1960.91666666667, value: 432 },
103
+ ],
104
+ [
105
+ records.size,
106
+ records[0],
107
+ records[-1]
108
+ ])
109
+ end
110
+
111
+ test("#metadata.id") do
112
+ assert_equal("rdatasets-datasets-AirPassengers", @dataset.metadata.id)
113
+ end
114
+
115
+ test("#metadata.description") do
116
+ description = @dataset.metadata.description
117
+ assert do
118
+ description.include?("Monthly Airline Passenger Numbers 1949-1960")
119
+ end
120
+ end
121
+ end
122
+
123
+ test("invalid dataset name") do
124
+ assert_raise(ArgumentError) do
125
+ Datasets::Rdatasets.new("datasets", "invalid datasets name")
126
+ end
127
+ end
128
+ end
129
+
130
+ test("invalid package name") do
131
+ assert_raise(ArgumentError) do
132
+ Datasets::Rdatasets.new("invalid package name", "AirPassengers")
133
+ end
134
+ end
135
+ end
136
+ end
data/test/test-table.rb CHANGED
@@ -3,9 +3,129 @@ class TableTest < Test::Unit::TestCase
3
3
  @table = Datasets::Iris.new.to_table
4
4
  end
5
5
 
6
- test("#[]") do
7
- assert_equal([1.4, 1.4, 1.3, 1.5, 1.4],
8
- @table[:petal_length].first(5))
6
+ test("#n_columns") do
7
+ assert_equal(5, @table.n_columns)
8
+ end
9
+
10
+ test("#n_rows") do
11
+ assert_equal(150, @table.n_rows)
12
+ end
13
+
14
+ test("#column_names") do
15
+ assert_equal([
16
+ :sepal_length,
17
+ :sepal_width,
18
+ :petal_length,
19
+ :petal_width,
20
+ :label,
21
+ ],
22
+ @table.column_names)
23
+ end
24
+
25
+ test("#each") do
26
+ shorten_hash = {}
27
+ @table.each do |name, values|
28
+ shorten_hash[name] = values.first(5)
29
+ end
30
+ assert_equal({
31
+ :label => ["Iris-setosa"] * 5,
32
+ :petal_length => [1.4, 1.4, 1.3, 1.5, 1.4],
33
+ :petal_width => [0.2, 0.2, 0.2, 0.2, 0.2],
34
+ :sepal_length => [5.1, 4.9, 4.7, 4.6, 5.0],
35
+ :sepal_width => [3.5, 3.0, 3.2, 3.1, 3.6],
36
+ },
37
+ shorten_hash)
38
+ end
39
+
40
+ test("#each_column") do
41
+ shorten_hash = {}
42
+ @table.each_column do |name, values|
43
+ shorten_hash[name] = values.first(5)
44
+ end
45
+ assert_equal({
46
+ :label => ["Iris-setosa"] * 5,
47
+ :petal_length => [1.4, 1.4, 1.3, 1.5, 1.4],
48
+ :petal_width => [0.2, 0.2, 0.2, 0.2, 0.2],
49
+ :sepal_length => [5.1, 4.9, 4.7, 4.6, 5.0],
50
+ :sepal_width => [3.5, 3.0, 3.2, 3.1, 3.6],
51
+ },
52
+ shorten_hash)
53
+ end
54
+
55
+ test("#each_record") do
56
+ records = []
57
+ @table.each_record do |record|
58
+ records << record
59
+ break if records.size == 3
60
+ end
61
+ assert_equal([
62
+ {
63
+ label: "Iris-setosa",
64
+ petal_length: 1.4,
65
+ petal_width: 0.2,
66
+ sepal_length: 5.1,
67
+ sepal_width: 3.5,
68
+ },
69
+ {
70
+ label: "Iris-setosa",
71
+ petal_length: 1.4,
72
+ petal_width: 0.2,
73
+ sepal_length: 4.9,
74
+ sepal_width: 3.0,
75
+ },
76
+ {
77
+ label: "Iris-setosa",
78
+ petal_length: 1.3,
79
+ petal_width: 0.2,
80
+ sepal_length: 4.7,
81
+ sepal_width: 3.2,
82
+ },
83
+ ],
84
+ records.collect(&:to_h))
85
+ end
86
+
87
+ sub_test_case("#find_record") do
88
+ test("positive") do
89
+ assert_equal({
90
+ label: "Iris-setosa",
91
+ petal_length: 1.4,
92
+ petal_width: 0.2,
93
+ sepal_length: 4.9,
94
+ sepal_width: 3.0,
95
+ },
96
+ @table.find_record(1).to_h)
97
+ end
98
+
99
+ test("positive - over") do
100
+ assert_nil(@table.find_record(151))
101
+ end
102
+
103
+ test("negative") do
104
+ assert_equal({
105
+ label: "Iris-virginica",
106
+ petal_length: 5.1,
107
+ petal_width: 1.8,
108
+ sepal_length: 5.9,
109
+ sepal_width: 3.0,
110
+ },
111
+ @table.find_record(-1).to_h)
112
+ end
113
+
114
+ test("negative - over") do
115
+ assert_nil(@table.find_record(-151))
116
+ end
117
+ end
118
+
119
+ sub_test_case("#[]") do
120
+ test("index") do
121
+ assert_equal([1.4, 1.4, 1.3, 1.5, 1.4],
122
+ @table[2].first(5))
123
+ end
124
+
125
+ test("name") do
126
+ assert_equal([1.4, 1.4, 1.3, 1.5, 1.4],
127
+ @table[:petal_length].first(5))
128
+ end
9
129
  end
10
130
 
11
131
  test("#dictionary_encode") do
@@ -58,21 +178,6 @@ class TableTest < Test::Unit::TestCase
58
178
  end
59
179
  end
60
180
 
61
- test("#each") do
62
- shorten_hash = {}
63
- @table.each do |name, values|
64
- shorten_hash[name] = values.first(5)
65
- end
66
- assert_equal({
67
- :label => ["Iris-setosa"] * 5,
68
- :petal_length => [1.4, 1.4, 1.3, 1.5, 1.4],
69
- :petal_width => [0.2, 0.2, 0.2, 0.2, 0.2],
70
- :sepal_length => [5.1, 4.9, 4.7, 4.6, 5.0],
71
- :sepal_width => [3.5, 3.0, 3.2, 3.1, 3.6],
72
- },
73
- shorten_hash)
74
- end
75
-
76
181
  test("#to_h") do
77
182
  shorten_hash = {}
78
183
  @table.to_h.each do |name, values|
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: red-datasets
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.7
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - tomisuker
@@ -9,8 +9,50 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2018-11-20 00:00:00.000000000 Z
12
+ date: 2021-06-03 00:00:00.000000000 Z
13
13
  dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: csv
16
+ requirement: !ruby/object:Gem::Requirement
17
+ requirements:
18
+ - - ">="
19
+ - !ruby/object:Gem::Version
20
+ version: 3.0.5
21
+ type: :runtime
22
+ prerelease: false
23
+ version_requirements: !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - ">="
26
+ - !ruby/object:Gem::Version
27
+ version: 3.0.5
28
+ - !ruby/object:Gem::Dependency
29
+ name: rexml
30
+ requirement: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - ">="
33
+ - !ruby/object:Gem::Version
34
+ version: '0'
35
+ type: :runtime
36
+ prerelease: false
37
+ version_requirements: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - ">="
40
+ - !ruby/object:Gem::Version
41
+ version: '0'
42
+ - !ruby/object:Gem::Dependency
43
+ name: rubyzip
44
+ requirement: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - ">="
47
+ - !ruby/object:Gem::Version
48
+ version: '0'
49
+ type: :runtime
50
+ prerelease: false
51
+ version_requirements: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ version: '0'
14
56
  - !ruby/object:Gem::Dependency
15
57
  name: bundler
16
58
  requirement: !ruby/object:Gem::Requirement
@@ -84,7 +126,7 @@ dependencies:
84
126
  description: 'You can use datasets easily because you can access each dataset with
85
127
  multiple ways such as `#each` and Apache Arrow Record Batch.
86
128
 
87
- '
129
+ '
88
130
  email:
89
131
  - tomisuker16@gmail.com
90
132
  - kou@clear-code.com
@@ -101,15 +143,27 @@ files:
101
143
  - lib/datasets.rb
102
144
  - lib/datasets/adult.rb
103
145
  - lib/datasets/cifar.rb
146
+ - lib/datasets/cldr-plurals.rb
147
+ - lib/datasets/communities.rb
104
148
  - lib/datasets/dataset.rb
105
149
  - lib/datasets/dictionary.rb
106
150
  - lib/datasets/downloader.rb
151
+ - lib/datasets/e-stat-japan.rb
152
+ - lib/datasets/error.rb
107
153
  - lib/datasets/fashion-mnist.rb
154
+ - lib/datasets/hepatitis.rb
108
155
  - lib/datasets/iris.rb
156
+ - lib/datasets/libsvm-dataset-list.rb
157
+ - lib/datasets/libsvm.rb
109
158
  - lib/datasets/metadata.rb
110
159
  - lib/datasets/mnist.rb
160
+ - lib/datasets/mushroom.rb
161
+ - lib/datasets/penguins.rb
111
162
  - lib/datasets/penn-treebank.rb
163
+ - lib/datasets/postal-code-japan.rb
164
+ - lib/datasets/rdatasets.rb
112
165
  - lib/datasets/table.rb
166
+ - lib/datasets/tar_gz_readable.rb
113
167
  - lib/datasets/version.rb
114
168
  - lib/datasets/wikipedia.rb
115
169
  - lib/datasets/wine.rb
@@ -118,11 +172,23 @@ files:
118
172
  - test/run-test.rb
119
173
  - test/test-adult.rb
120
174
  - test/test-cifar.rb
175
+ - test/test-cldr-plurals.rb
176
+ - test/test-communities.rb
177
+ - test/test-dataset.rb
121
178
  - test/test-dictionary.rb
179
+ - test/test-downloader.rb
180
+ - test/test-e-stat-japan.rb
122
181
  - test/test-fashion-mnist.rb
182
+ - test/test-hepatitis.rb
123
183
  - test/test-iris.rb
184
+ - test/test-libsvm-dataset-list.rb
185
+ - test/test-libsvm.rb
124
186
  - test/test-mnist.rb
187
+ - test/test-mushroom.rb
188
+ - test/test-penguins.rb
125
189
  - test/test-penn-treebank.rb
190
+ - test/test-postal-code-japan.rb
191
+ - test/test-rdatasets.rb
126
192
  - test/test-table.rb
127
193
  - test/test-wikipedia.rb
128
194
  - test/test-wine.rb
@@ -145,21 +211,32 @@ required_rubygems_version: !ruby/object:Gem::Requirement
145
211
  - !ruby/object:Gem::Version
146
212
  version: '0'
147
213
  requirements: []
148
- rubyforge_project:
149
- rubygems_version: 3.0.0.beta2
214
+ rubygems_version: 3.3.0.dev
150
215
  signing_key:
151
216
  specification_version: 4
152
217
  summary: Red Datasets provides classes that provide common datasets such as iris dataset.
153
218
  test_files:
154
- - test/test-wine.rb
219
+ - test/helper.rb
155
220
  - test/run-test.rb
221
+ - test/test-adult.rb
156
222
  - test/test-cifar.rb
223
+ - test/test-cldr-plurals.rb
224
+ - test/test-communities.rb
225
+ - test/test-dataset.rb
226
+ - test/test-dictionary.rb
227
+ - test/test-downloader.rb
228
+ - test/test-e-stat-japan.rb
157
229
  - test/test-fashion-mnist.rb
158
- - test/test-wikipedia.rb
230
+ - test/test-hepatitis.rb
159
231
  - test/test-iris.rb
160
- - test/helper.rb
232
+ - test/test-libsvm-dataset-list.rb
233
+ - test/test-libsvm.rb
161
234
  - test/test-mnist.rb
162
- - test/test-table.rb
163
- - test/test-adult.rb
235
+ - test/test-mushroom.rb
236
+ - test/test-penguins.rb
164
237
  - test/test-penn-treebank.rb
165
- - test/test-dictionary.rb
238
+ - test/test-postal-code-japan.rb
239
+ - test/test-rdatasets.rb
240
+ - test/test-table.rb
241
+ - test/test-wikipedia.rb
242
+ - test/test-wine.rb