red-datasets 0.0.8 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,97 @@
1
+ class SeabornDataTest < Test::Unit::TestCase
2
+ sub_test_case("fmri") do
3
+ def setup
4
+ @dataset = Datasets::SeabornData.new("fmri")
5
+ end
6
+
7
+ def test_each
8
+ records = @dataset.each.to_a
9
+ assert_equal([
10
+ 1064,
11
+ {
12
+ subject: "s5",
13
+ timepoint: 14,
14
+ event: "stim",
15
+ region: "parietal",
16
+ signal: -0.0808829319505
17
+ },
18
+ {
19
+ subject: "s0",
20
+ timepoint: 0,
21
+ event: "cue",
22
+ region: "parietal",
23
+ signal: -0.00689923478092
24
+ }
25
+ ],
26
+ [
27
+ records.size,
28
+ records[1].to_h,
29
+ records[-1].to_h
30
+ ])
31
+ end
32
+ end
33
+
34
+ sub_test_case("flights") do
35
+ def setup
36
+ @dataset = Datasets::SeabornData.new("flights")
37
+ end
38
+
39
+ def test_each
40
+ records = @dataset.each.to_a
41
+ assert_equal([
42
+ 144,
43
+ {
44
+ year: 1949,
45
+ month: "Feb",
46
+ passengers: 118
47
+ },
48
+ {
49
+ year: 1960,
50
+ month: "Dec",
51
+ passengers: 432
52
+ }
53
+ ],
54
+ [
55
+ records.size,
56
+ records[1].to_h,
57
+ records[-1].to_h
58
+ ])
59
+ end
60
+ end
61
+
62
+ sub_test_case("penguins") do
63
+ def setup
64
+ @dataset = Datasets::SeabornData.new("penguins")
65
+ end
66
+
67
+ def test_each
68
+ records = @dataset.each.to_a
69
+ assert_equal([
70
+ 344,
71
+ {
72
+ species: "Adelie",
73
+ island: "Torgersen",
74
+ bill_length_mm: 39.5,
75
+ bill_depth_mm: 17.4,
76
+ flipper_length_mm: 186,
77
+ body_mass_g: 3800,
78
+ sex: "Female"
79
+ },
80
+ {
81
+ species: "Gentoo",
82
+ island: "Biscoe",
83
+ bill_length_mm: 49.9,
84
+ bill_depth_mm: 16.1,
85
+ flipper_length_mm: 213,
86
+ body_mass_g: 5400,
87
+ sex: "Male"
88
+ }
89
+ ],
90
+ [
91
+ records.size,
92
+ records[1].to_h,
93
+ records[-1].to_h
94
+ ])
95
+ end
96
+ end
97
+ end
@@ -0,0 +1,48 @@
1
+ class SudachiSynonymDictionaryTest < Test::Unit::TestCase
2
+ def setup
3
+ @dataset = Datasets::SudachiSynonymDictionary.new
4
+ end
5
+
6
+ test('#each') do
7
+ records = @dataset.each.to_a
8
+ assert_equal([
9
+ 61335,
10
+ {
11
+ group_id: "000001",
12
+ is_noun: true,
13
+ expansion_type: :always,
14
+ lexeme_id: 1,
15
+ form_type: :typical,
16
+ acronym_type: :typical,
17
+ variant_type: :typical,
18
+ categories: [],
19
+ notation: "曖昧",
20
+ },
21
+ {
22
+ group_id: "023705",
23
+ is_noun: true,
24
+ expansion_type: :always,
25
+ lexeme_id: 1,
26
+ form_type: :typical,
27
+ acronym_type: :alphabet,
28
+ variant_type: :typical,
29
+ categories: ["単位"],
30
+ notation: "GB",
31
+ },
32
+ ],
33
+ [
34
+ records.size,
35
+ records[0].to_h,
36
+ records[-1].to_h,
37
+ ])
38
+ end
39
+
40
+ sub_test_case('#metadata') do
41
+ test('#description') do
42
+ description = @dataset.metadata.description
43
+ assert do
44
+ description.start_with?('# Sudachi 同義語辞書')
45
+ end
46
+ end
47
+ end
48
+ end
data/test/test-table.rb CHANGED
@@ -3,9 +3,129 @@ class TableTest < Test::Unit::TestCase
3
3
  @table = Datasets::Iris.new.to_table
4
4
  end
5
5
 
6
- test("#[]") do
7
- assert_equal([1.4, 1.4, 1.3, 1.5, 1.4],
8
- @table[:petal_length].first(5))
6
+ test("#n_columns") do
7
+ assert_equal(5, @table.n_columns)
8
+ end
9
+
10
+ test("#n_rows") do
11
+ assert_equal(150, @table.n_rows)
12
+ end
13
+
14
+ test("#column_names") do
15
+ assert_equal([
16
+ :sepal_length,
17
+ :sepal_width,
18
+ :petal_length,
19
+ :petal_width,
20
+ :label,
21
+ ],
22
+ @table.column_names)
23
+ end
24
+
25
+ test("#each") do
26
+ shorten_hash = {}
27
+ @table.each do |name, values|
28
+ shorten_hash[name] = values.first(5)
29
+ end
30
+ assert_equal({
31
+ :label => ["Iris-setosa"] * 5,
32
+ :petal_length => [1.4, 1.4, 1.3, 1.5, 1.4],
33
+ :petal_width => [0.2, 0.2, 0.2, 0.2, 0.2],
34
+ :sepal_length => [5.1, 4.9, 4.7, 4.6, 5.0],
35
+ :sepal_width => [3.5, 3.0, 3.2, 3.1, 3.6],
36
+ },
37
+ shorten_hash)
38
+ end
39
+
40
+ test("#each_column") do
41
+ shorten_hash = {}
42
+ @table.each_column do |name, values|
43
+ shorten_hash[name] = values.first(5)
44
+ end
45
+ assert_equal({
46
+ :label => ["Iris-setosa"] * 5,
47
+ :petal_length => [1.4, 1.4, 1.3, 1.5, 1.4],
48
+ :petal_width => [0.2, 0.2, 0.2, 0.2, 0.2],
49
+ :sepal_length => [5.1, 4.9, 4.7, 4.6, 5.0],
50
+ :sepal_width => [3.5, 3.0, 3.2, 3.1, 3.6],
51
+ },
52
+ shorten_hash)
53
+ end
54
+
55
+ test("#each_record") do
56
+ records = []
57
+ @table.each_record do |record|
58
+ records << record
59
+ break if records.size == 3
60
+ end
61
+ assert_equal([
62
+ {
63
+ label: "Iris-setosa",
64
+ petal_length: 1.4,
65
+ petal_width: 0.2,
66
+ sepal_length: 5.1,
67
+ sepal_width: 3.5,
68
+ },
69
+ {
70
+ label: "Iris-setosa",
71
+ petal_length: 1.4,
72
+ petal_width: 0.2,
73
+ sepal_length: 4.9,
74
+ sepal_width: 3.0,
75
+ },
76
+ {
77
+ label: "Iris-setosa",
78
+ petal_length: 1.3,
79
+ petal_width: 0.2,
80
+ sepal_length: 4.7,
81
+ sepal_width: 3.2,
82
+ },
83
+ ],
84
+ records.collect(&:to_h))
85
+ end
86
+
87
+ sub_test_case("#find_record") do
88
+ test("positive") do
89
+ assert_equal({
90
+ label: "Iris-setosa",
91
+ petal_length: 1.4,
92
+ petal_width: 0.2,
93
+ sepal_length: 4.9,
94
+ sepal_width: 3.0,
95
+ },
96
+ @table.find_record(1).to_h)
97
+ end
98
+
99
+ test("positive - over") do
100
+ assert_nil(@table.find_record(151))
101
+ end
102
+
103
+ test("negative") do
104
+ assert_equal({
105
+ label: "Iris-virginica",
106
+ petal_length: 5.1,
107
+ petal_width: 1.8,
108
+ sepal_length: 5.9,
109
+ sepal_width: 3.0,
110
+ },
111
+ @table.find_record(-1).to_h)
112
+ end
113
+
114
+ test("negative - over") do
115
+ assert_nil(@table.find_record(-151))
116
+ end
117
+ end
118
+
119
+ sub_test_case("#[]") do
120
+ test("index") do
121
+ assert_equal([1.4, 1.4, 1.3, 1.5, 1.4],
122
+ @table[2].first(5))
123
+ end
124
+
125
+ test("name") do
126
+ assert_equal([1.4, 1.4, 1.3, 1.5, 1.4],
127
+ @table[:petal_length].first(5))
128
+ end
9
129
  end
10
130
 
11
131
  test("#dictionary_encode") do
@@ -58,21 +178,6 @@ class TableTest < Test::Unit::TestCase
58
178
  end
59
179
  end
60
180
 
61
- test("#each") do
62
- shorten_hash = {}
63
- @table.each do |name, values|
64
- shorten_hash[name] = values.first(5)
65
- end
66
- assert_equal({
67
- :label => ["Iris-setosa"] * 5,
68
- :petal_length => [1.4, 1.4, 1.3, 1.5, 1.4],
69
- :petal_width => [0.2, 0.2, 0.2, 0.2, 0.2],
70
- :sepal_length => [5.1, 4.9, 4.7, 4.6, 5.0],
71
- :sepal_width => [3.5, 3.0, 3.2, 3.1, 3.6],
72
- },
73
- shorten_hash)
74
- end
75
-
76
181
  test("#to_h") do
77
182
  shorten_hash = {}
78
183
  @table.to_h.each do |name, values|
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: red-datasets
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.8
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - tomisuker
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2019-03-24 00:00:00.000000000 Z
12
+ date: 2021-07-09 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: csv
@@ -25,6 +25,20 @@ dependencies:
25
25
  - - ">="
26
26
  - !ruby/object:Gem::Version
27
27
  version: 3.0.5
28
+ - !ruby/object:Gem::Dependency
29
+ name: rexml
30
+ requirement: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - ">="
33
+ - !ruby/object:Gem::Version
34
+ version: '0'
35
+ type: :runtime
36
+ prerelease: false
37
+ version_requirements: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - ">="
40
+ - !ruby/object:Gem::Version
41
+ version: '0'
28
42
  - !ruby/object:Gem::Dependency
29
43
  name: rubyzip
30
44
  requirement: !ruby/object:Gem::Requirement
@@ -112,7 +126,7 @@ dependencies:
112
126
  description: 'You can use datasets easily because you can access each dataset with
113
127
  multiple ways such as `#each` and Apache Arrow Record Batch.
114
128
 
115
- '
129
+ '
116
130
  email:
117
131
  - tomisuker16@gmail.com
118
132
  - kou@clear-code.com
@@ -129,18 +143,29 @@ files:
129
143
  - lib/datasets.rb
130
144
  - lib/datasets/adult.rb
131
145
  - lib/datasets/cifar.rb
146
+ - lib/datasets/cldr-plurals.rb
147
+ - lib/datasets/communities.rb
132
148
  - lib/datasets/dataset.rb
133
149
  - lib/datasets/dictionary.rb
134
150
  - lib/datasets/downloader.rb
151
+ - lib/datasets/e-stat-japan.rb
152
+ - lib/datasets/error.rb
135
153
  - lib/datasets/fashion-mnist.rb
154
+ - lib/datasets/hepatitis.rb
136
155
  - lib/datasets/iris.rb
137
156
  - lib/datasets/libsvm-dataset-list.rb
138
157
  - lib/datasets/libsvm.rb
139
158
  - lib/datasets/metadata.rb
140
159
  - lib/datasets/mnist.rb
160
+ - lib/datasets/mushroom.rb
161
+ - lib/datasets/penguins.rb
141
162
  - lib/datasets/penn-treebank.rb
142
163
  - lib/datasets/postal-code-japan.rb
164
+ - lib/datasets/rdatasets.rb
165
+ - lib/datasets/seaborn-data.rb
166
+ - lib/datasets/sudachi-synonym-dictionary.rb
143
167
  - lib/datasets/table.rb
168
+ - lib/datasets/tar-gz-readable.rb
144
169
  - lib/datasets/version.rb
145
170
  - lib/datasets/wikipedia.rb
146
171
  - lib/datasets/wine.rb
@@ -149,14 +174,25 @@ files:
149
174
  - test/run-test.rb
150
175
  - test/test-adult.rb
151
176
  - test/test-cifar.rb
177
+ - test/test-cldr-plurals.rb
178
+ - test/test-communities.rb
179
+ - test/test-dataset.rb
152
180
  - test/test-dictionary.rb
181
+ - test/test-downloader.rb
182
+ - test/test-e-stat-japan.rb
153
183
  - test/test-fashion-mnist.rb
184
+ - test/test-hepatitis.rb
154
185
  - test/test-iris.rb
155
186
  - test/test-libsvm-dataset-list.rb
156
187
  - test/test-libsvm.rb
157
188
  - test/test-mnist.rb
189
+ - test/test-mushroom.rb
190
+ - test/test-penguins.rb
158
191
  - test/test-penn-treebank.rb
159
192
  - test/test-postal-code-japan.rb
193
+ - test/test-rdatasets.rb
194
+ - test/test-seaborn-data.rb
195
+ - test/test-sudachi-synonym-dictionary.rb
160
196
  - test/test-table.rb
161
197
  - test/test-wikipedia.rb
162
198
  - test/test-wine.rb
@@ -179,24 +215,34 @@ required_rubygems_version: !ruby/object:Gem::Requirement
179
215
  - !ruby/object:Gem::Version
180
216
  version: '0'
181
217
  requirements: []
182
- rubyforge_project:
183
- rubygems_version: 2.7.6
218
+ rubygems_version: 3.3.0.dev
184
219
  signing_key:
185
220
  specification_version: 4
186
221
  summary: Red Datasets provides classes that provide common datasets such as iris dataset.
187
222
  test_files:
188
- - test/test-iris.rb
189
- - test/test-wikipedia.rb
190
- - test/test-fashion-mnist.rb
191
- - test/test-wine.rb
192
- - test/test-postal-code-japan.rb
193
- - test/test-mnist.rb
194
223
  - test/helper.rb
195
- - test/test-adult.rb
196
- - test/test-libsvm.rb
197
224
  - test/run-test.rb
198
- - test/test-table.rb
225
+ - test/test-adult.rb
199
226
  - test/test-cifar.rb
227
+ - test/test-cldr-plurals.rb
228
+ - test/test-communities.rb
229
+ - test/test-dataset.rb
230
+ - test/test-dictionary.rb
231
+ - test/test-downloader.rb
232
+ - test/test-e-stat-japan.rb
233
+ - test/test-fashion-mnist.rb
234
+ - test/test-hepatitis.rb
235
+ - test/test-iris.rb
200
236
  - test/test-libsvm-dataset-list.rb
237
+ - test/test-libsvm.rb
238
+ - test/test-mnist.rb
239
+ - test/test-mushroom.rb
240
+ - test/test-penguins.rb
201
241
  - test/test-penn-treebank.rb
202
- - test/test-dictionary.rb
242
+ - test/test-postal-code-japan.rb
243
+ - test/test-rdatasets.rb
244
+ - test/test-seaborn-data.rb
245
+ - test/test-sudachi-synonym-dictionary.rb
246
+ - test/test-table.rb
247
+ - test/test-wikipedia.rb
248
+ - test/test-wine.rb