red-datasets 0.0.8 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,97 @@
1
+ class SeabornDataTest < Test::Unit::TestCase
2
+ sub_test_case("fmri") do
3
+ def setup
4
+ @dataset = Datasets::SeabornData.new("fmri")
5
+ end
6
+
7
+ def test_each
8
+ records = @dataset.each.to_a
9
+ assert_equal([
10
+ 1064,
11
+ {
12
+ subject: "s5",
13
+ timepoint: 14,
14
+ event: "stim",
15
+ region: "parietal",
16
+ signal: -0.0808829319505
17
+ },
18
+ {
19
+ subject: "s0",
20
+ timepoint: 0,
21
+ event: "cue",
22
+ region: "parietal",
23
+ signal: -0.00689923478092
24
+ }
25
+ ],
26
+ [
27
+ records.size,
28
+ records[1].to_h,
29
+ records[-1].to_h
30
+ ])
31
+ end
32
+ end
33
+
34
+ sub_test_case("flights") do
35
+ def setup
36
+ @dataset = Datasets::SeabornData.new("flights")
37
+ end
38
+
39
+ def test_each
40
+ records = @dataset.each.to_a
41
+ assert_equal([
42
+ 144,
43
+ {
44
+ year: 1949,
45
+ month: "Feb",
46
+ passengers: 118
47
+ },
48
+ {
49
+ year: 1960,
50
+ month: "Dec",
51
+ passengers: 432
52
+ }
53
+ ],
54
+ [
55
+ records.size,
56
+ records[1].to_h,
57
+ records[-1].to_h
58
+ ])
59
+ end
60
+ end
61
+
62
+ sub_test_case("penguins") do
63
+ def setup
64
+ @dataset = Datasets::SeabornData.new("penguins")
65
+ end
66
+
67
+ def test_each
68
+ records = @dataset.each.to_a
69
+ assert_equal([
70
+ 344,
71
+ {
72
+ species: "Adelie",
73
+ island: "Torgersen",
74
+ bill_length_mm: 39.5,
75
+ bill_depth_mm: 17.4,
76
+ flipper_length_mm: 186,
77
+ body_mass_g: 3800,
78
+ sex: "Female"
79
+ },
80
+ {
81
+ species: "Gentoo",
82
+ island: "Biscoe",
83
+ bill_length_mm: 49.9,
84
+ bill_depth_mm: 16.1,
85
+ flipper_length_mm: 213,
86
+ body_mass_g: 5400,
87
+ sex: "Male"
88
+ }
89
+ ],
90
+ [
91
+ records.size,
92
+ records[1].to_h,
93
+ records[-1].to_h
94
+ ])
95
+ end
96
+ end
97
+ end
@@ -0,0 +1,48 @@
1
+ class SudachiSynonymDictionaryTest < Test::Unit::TestCase
2
+ def setup
3
+ @dataset = Datasets::SudachiSynonymDictionary.new
4
+ end
5
+
6
+ test('#each') do
7
+ records = @dataset.each.to_a
8
+ assert_equal([
9
+ 61335,
10
+ {
11
+ group_id: "000001",
12
+ is_noun: true,
13
+ expansion_type: :always,
14
+ lexeme_id: 1,
15
+ form_type: :typical,
16
+ acronym_type: :typical,
17
+ variant_type: :typical,
18
+ categories: [],
19
+ notation: "曖昧",
20
+ },
21
+ {
22
+ group_id: "023705",
23
+ is_noun: true,
24
+ expansion_type: :always,
25
+ lexeme_id: 1,
26
+ form_type: :typical,
27
+ acronym_type: :alphabet,
28
+ variant_type: :typical,
29
+ categories: ["単位"],
30
+ notation: "GB",
31
+ },
32
+ ],
33
+ [
34
+ records.size,
35
+ records[0].to_h,
36
+ records[-1].to_h,
37
+ ])
38
+ end
39
+
40
+ sub_test_case('#metadata') do
41
+ test('#description') do
42
+ description = @dataset.metadata.description
43
+ assert do
44
+ description.start_with?('# Sudachi 同義語辞書')
45
+ end
46
+ end
47
+ end
48
+ end
data/test/test-table.rb CHANGED
@@ -3,9 +3,129 @@ class TableTest < Test::Unit::TestCase
3
3
  @table = Datasets::Iris.new.to_table
4
4
  end
5
5
 
6
- test("#[]") do
7
- assert_equal([1.4, 1.4, 1.3, 1.5, 1.4],
8
- @table[:petal_length].first(5))
6
+ test("#n_columns") do
7
+ assert_equal(5, @table.n_columns)
8
+ end
9
+
10
+ test("#n_rows") do
11
+ assert_equal(150, @table.n_rows)
12
+ end
13
+
14
+ test("#column_names") do
15
+ assert_equal([
16
+ :sepal_length,
17
+ :sepal_width,
18
+ :petal_length,
19
+ :petal_width,
20
+ :label,
21
+ ],
22
+ @table.column_names)
23
+ end
24
+
25
+ test("#each") do
26
+ shorten_hash = {}
27
+ @table.each do |name, values|
28
+ shorten_hash[name] = values.first(5)
29
+ end
30
+ assert_equal({
31
+ :label => ["Iris-setosa"] * 5,
32
+ :petal_length => [1.4, 1.4, 1.3, 1.5, 1.4],
33
+ :petal_width => [0.2, 0.2, 0.2, 0.2, 0.2],
34
+ :sepal_length => [5.1, 4.9, 4.7, 4.6, 5.0],
35
+ :sepal_width => [3.5, 3.0, 3.2, 3.1, 3.6],
36
+ },
37
+ shorten_hash)
38
+ end
39
+
40
+ test("#each_column") do
41
+ shorten_hash = {}
42
+ @table.each_column do |name, values|
43
+ shorten_hash[name] = values.first(5)
44
+ end
45
+ assert_equal({
46
+ :label => ["Iris-setosa"] * 5,
47
+ :petal_length => [1.4, 1.4, 1.3, 1.5, 1.4],
48
+ :petal_width => [0.2, 0.2, 0.2, 0.2, 0.2],
49
+ :sepal_length => [5.1, 4.9, 4.7, 4.6, 5.0],
50
+ :sepal_width => [3.5, 3.0, 3.2, 3.1, 3.6],
51
+ },
52
+ shorten_hash)
53
+ end
54
+
55
+ test("#each_record") do
56
+ records = []
57
+ @table.each_record do |record|
58
+ records << record
59
+ break if records.size == 3
60
+ end
61
+ assert_equal([
62
+ {
63
+ label: "Iris-setosa",
64
+ petal_length: 1.4,
65
+ petal_width: 0.2,
66
+ sepal_length: 5.1,
67
+ sepal_width: 3.5,
68
+ },
69
+ {
70
+ label: "Iris-setosa",
71
+ petal_length: 1.4,
72
+ petal_width: 0.2,
73
+ sepal_length: 4.9,
74
+ sepal_width: 3.0,
75
+ },
76
+ {
77
+ label: "Iris-setosa",
78
+ petal_length: 1.3,
79
+ petal_width: 0.2,
80
+ sepal_length: 4.7,
81
+ sepal_width: 3.2,
82
+ },
83
+ ],
84
+ records.collect(&:to_h))
85
+ end
86
+
87
+ sub_test_case("#find_record") do
88
+ test("positive") do
89
+ assert_equal({
90
+ label: "Iris-setosa",
91
+ petal_length: 1.4,
92
+ petal_width: 0.2,
93
+ sepal_length: 4.9,
94
+ sepal_width: 3.0,
95
+ },
96
+ @table.find_record(1).to_h)
97
+ end
98
+
99
+ test("positive - over") do
100
+ assert_nil(@table.find_record(151))
101
+ end
102
+
103
+ test("negative") do
104
+ assert_equal({
105
+ label: "Iris-virginica",
106
+ petal_length: 5.1,
107
+ petal_width: 1.8,
108
+ sepal_length: 5.9,
109
+ sepal_width: 3.0,
110
+ },
111
+ @table.find_record(-1).to_h)
112
+ end
113
+
114
+ test("negative - over") do
115
+ assert_nil(@table.find_record(-151))
116
+ end
117
+ end
118
+
119
+ sub_test_case("#[]") do
120
+ test("index") do
121
+ assert_equal([1.4, 1.4, 1.3, 1.5, 1.4],
122
+ @table[2].first(5))
123
+ end
124
+
125
+ test("name") do
126
+ assert_equal([1.4, 1.4, 1.3, 1.5, 1.4],
127
+ @table[:petal_length].first(5))
128
+ end
9
129
  end
10
130
 
11
131
  test("#dictionary_encode") do
@@ -58,21 +178,6 @@ class TableTest < Test::Unit::TestCase
58
178
  end
59
179
  end
60
180
 
61
- test("#each") do
62
- shorten_hash = {}
63
- @table.each do |name, values|
64
- shorten_hash[name] = values.first(5)
65
- end
66
- assert_equal({
67
- :label => ["Iris-setosa"] * 5,
68
- :petal_length => [1.4, 1.4, 1.3, 1.5, 1.4],
69
- :petal_width => [0.2, 0.2, 0.2, 0.2, 0.2],
70
- :sepal_length => [5.1, 4.9, 4.7, 4.6, 5.0],
71
- :sepal_width => [3.5, 3.0, 3.2, 3.1, 3.6],
72
- },
73
- shorten_hash)
74
- end
75
-
76
181
  test("#to_h") do
77
182
  shorten_hash = {}
78
183
  @table.to_h.each do |name, values|
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: red-datasets
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.8
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - tomisuker
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2019-03-24 00:00:00.000000000 Z
12
+ date: 2021-07-09 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: csv
@@ -25,6 +25,20 @@ dependencies:
25
25
  - - ">="
26
26
  - !ruby/object:Gem::Version
27
27
  version: 3.0.5
28
+ - !ruby/object:Gem::Dependency
29
+ name: rexml
30
+ requirement: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - ">="
33
+ - !ruby/object:Gem::Version
34
+ version: '0'
35
+ type: :runtime
36
+ prerelease: false
37
+ version_requirements: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - ">="
40
+ - !ruby/object:Gem::Version
41
+ version: '0'
28
42
  - !ruby/object:Gem::Dependency
29
43
  name: rubyzip
30
44
  requirement: !ruby/object:Gem::Requirement
@@ -112,7 +126,7 @@ dependencies:
112
126
  description: 'You can use datasets easily because you can access each dataset with
113
127
  multiple ways such as `#each` and Apache Arrow Record Batch.
114
128
 
115
- '
129
+ '
116
130
  email:
117
131
  - tomisuker16@gmail.com
118
132
  - kou@clear-code.com
@@ -129,18 +143,29 @@ files:
129
143
  - lib/datasets.rb
130
144
  - lib/datasets/adult.rb
131
145
  - lib/datasets/cifar.rb
146
+ - lib/datasets/cldr-plurals.rb
147
+ - lib/datasets/communities.rb
132
148
  - lib/datasets/dataset.rb
133
149
  - lib/datasets/dictionary.rb
134
150
  - lib/datasets/downloader.rb
151
+ - lib/datasets/e-stat-japan.rb
152
+ - lib/datasets/error.rb
135
153
  - lib/datasets/fashion-mnist.rb
154
+ - lib/datasets/hepatitis.rb
136
155
  - lib/datasets/iris.rb
137
156
  - lib/datasets/libsvm-dataset-list.rb
138
157
  - lib/datasets/libsvm.rb
139
158
  - lib/datasets/metadata.rb
140
159
  - lib/datasets/mnist.rb
160
+ - lib/datasets/mushroom.rb
161
+ - lib/datasets/penguins.rb
141
162
  - lib/datasets/penn-treebank.rb
142
163
  - lib/datasets/postal-code-japan.rb
164
+ - lib/datasets/rdatasets.rb
165
+ - lib/datasets/seaborn-data.rb
166
+ - lib/datasets/sudachi-synonym-dictionary.rb
143
167
  - lib/datasets/table.rb
168
+ - lib/datasets/tar-gz-readable.rb
144
169
  - lib/datasets/version.rb
145
170
  - lib/datasets/wikipedia.rb
146
171
  - lib/datasets/wine.rb
@@ -149,14 +174,25 @@ files:
149
174
  - test/run-test.rb
150
175
  - test/test-adult.rb
151
176
  - test/test-cifar.rb
177
+ - test/test-cldr-plurals.rb
178
+ - test/test-communities.rb
179
+ - test/test-dataset.rb
152
180
  - test/test-dictionary.rb
181
+ - test/test-downloader.rb
182
+ - test/test-e-stat-japan.rb
153
183
  - test/test-fashion-mnist.rb
184
+ - test/test-hepatitis.rb
154
185
  - test/test-iris.rb
155
186
  - test/test-libsvm-dataset-list.rb
156
187
  - test/test-libsvm.rb
157
188
  - test/test-mnist.rb
189
+ - test/test-mushroom.rb
190
+ - test/test-penguins.rb
158
191
  - test/test-penn-treebank.rb
159
192
  - test/test-postal-code-japan.rb
193
+ - test/test-rdatasets.rb
194
+ - test/test-seaborn-data.rb
195
+ - test/test-sudachi-synonym-dictionary.rb
160
196
  - test/test-table.rb
161
197
  - test/test-wikipedia.rb
162
198
  - test/test-wine.rb
@@ -179,24 +215,34 @@ required_rubygems_version: !ruby/object:Gem::Requirement
179
215
  - !ruby/object:Gem::Version
180
216
  version: '0'
181
217
  requirements: []
182
- rubyforge_project:
183
- rubygems_version: 2.7.6
218
+ rubygems_version: 3.3.0.dev
184
219
  signing_key:
185
220
  specification_version: 4
186
221
  summary: Red Datasets provides classes that provide common datasets such as iris dataset.
187
222
  test_files:
188
- - test/test-iris.rb
189
- - test/test-wikipedia.rb
190
- - test/test-fashion-mnist.rb
191
- - test/test-wine.rb
192
- - test/test-postal-code-japan.rb
193
- - test/test-mnist.rb
194
223
  - test/helper.rb
195
- - test/test-adult.rb
196
- - test/test-libsvm.rb
197
224
  - test/run-test.rb
198
- - test/test-table.rb
225
+ - test/test-adult.rb
199
226
  - test/test-cifar.rb
227
+ - test/test-cldr-plurals.rb
228
+ - test/test-communities.rb
229
+ - test/test-dataset.rb
230
+ - test/test-dictionary.rb
231
+ - test/test-downloader.rb
232
+ - test/test-e-stat-japan.rb
233
+ - test/test-fashion-mnist.rb
234
+ - test/test-hepatitis.rb
235
+ - test/test-iris.rb
200
236
  - test/test-libsvm-dataset-list.rb
237
+ - test/test-libsvm.rb
238
+ - test/test-mnist.rb
239
+ - test/test-mushroom.rb
240
+ - test/test-penguins.rb
201
241
  - test/test-penn-treebank.rb
202
- - test/test-dictionary.rb
242
+ - test/test-postal-code-japan.rb
243
+ - test/test-rdatasets.rb
244
+ - test/test-seaborn-data.rb
245
+ - test/test-sudachi-synonym-dictionary.rb
246
+ - test/test-table.rb
247
+ - test/test-wikipedia.rb
248
+ - test/test-wine.rb