red-datasets 0.0.6 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +23 -7
- data/doc/text/news.md +124 -0
- data/lib/datasets.rb +18 -6
- data/lib/datasets/adult.rb +84 -0
- data/lib/datasets/cldr-plurals.rb +385 -0
- data/lib/datasets/communities.rb +198 -0
- data/lib/datasets/dataset.rb +13 -0
- data/lib/datasets/dictionary.rb +59 -0
- data/lib/datasets/downloader.rb +37 -62
- data/lib/datasets/e-stat-japan.rb +320 -0
- data/lib/datasets/error.rb +4 -0
- data/lib/datasets/fashion-mnist.rb +12 -0
- data/lib/datasets/hepatitis.rb +207 -0
- data/lib/datasets/iris.rb +1 -1
- data/lib/datasets/libsvm-dataset-list.rb +277 -0
- data/lib/datasets/libsvm.rb +135 -0
- data/lib/datasets/mnist.rb +11 -8
- data/lib/datasets/mushroom.rb +256 -0
- data/lib/datasets/penguins.rb +125 -0
- data/lib/datasets/penn-treebank.rb +2 -9
- data/lib/datasets/postal-code-japan.rb +154 -0
- data/lib/datasets/table.rb +99 -3
- data/lib/datasets/version.rb +1 -1
- data/lib/datasets/wikipedia.rb +2 -10
- data/lib/datasets/wine.rb +64 -0
- data/red-datasets.gemspec +4 -0
- data/test/helper.rb +1 -0
- data/test/run-test.rb +2 -0
- data/test/test-adult.rb +126 -0
- data/test/test-cldr-plurals.rb +180 -0
- data/test/test-communities.rb +290 -0
- data/test/test-dictionary.rb +43 -0
- data/test/test-e-stat-japan.rb +383 -0
- data/test/test-fashion-mnist.rb +137 -0
- data/test/test-hepatitis.rb +74 -0
- data/test/test-libsvm-dataset-list.rb +47 -0
- data/test/test-libsvm.rb +205 -0
- data/test/test-mnist.rb +95 -70
- data/test/test-mushroom.rb +80 -0
- data/test/test-penguins.rb +239 -0
- data/test/test-penn-treebank.rb +6 -6
- data/test/test-postal-code-japan.rb +69 -0
- data/test/test-table.rb +144 -19
- data/test/test-wine.rb +58 -0
- metadata +89 -8
@@ -0,0 +1,80 @@
|
|
1
|
+
class MushroomTest < Test::Unit::TestCase
|
2
|
+
def setup
|
3
|
+
@dataset = Datasets::Mushroom.new
|
4
|
+
end
|
5
|
+
|
6
|
+
def record(*args)
|
7
|
+
Datasets::Mushroom::Record.new(*args)
|
8
|
+
end
|
9
|
+
|
10
|
+
test("#each") do
|
11
|
+
records = @dataset.each.to_a
|
12
|
+
assert_equal([
|
13
|
+
8124,
|
14
|
+
{
|
15
|
+
:label => "poisonous",
|
16
|
+
:cap_shape => "convex",
|
17
|
+
:cap_surface => "smooth",
|
18
|
+
:cap_color => "brown",
|
19
|
+
:bruises => "bruises",
|
20
|
+
:odor => "pungent",
|
21
|
+
:gill_attachment => "free",
|
22
|
+
:gill_spacing => "close",
|
23
|
+
:gill_size => "narrow",
|
24
|
+
:gill_color => "black",
|
25
|
+
:stalk_shape => "enlarging",
|
26
|
+
:stalk_root => "equal",
|
27
|
+
:stalk_surface_above_ring => "smooth",
|
28
|
+
:stalk_surface_below_ring => "smooth",
|
29
|
+
:stalk_color_above_ring => "white",
|
30
|
+
:stalk_color_below_ring => "white",
|
31
|
+
:veil_type => "partial",
|
32
|
+
:veil_color => "white",
|
33
|
+
:n_rings => 1,
|
34
|
+
:ring_type => "pendant",
|
35
|
+
:spore_print_color => "black",
|
36
|
+
:population => "scattered",
|
37
|
+
:habitat => "urban"
|
38
|
+
},
|
39
|
+
{
|
40
|
+
:label => "edible",
|
41
|
+
:cap_shape => "convex",
|
42
|
+
:cap_surface => "smooth",
|
43
|
+
:cap_color => "brown",
|
44
|
+
:bruises => "no",
|
45
|
+
:odor => "none",
|
46
|
+
:gill_attachment => "attached",
|
47
|
+
:gill_spacing => "close",
|
48
|
+
:gill_size => "broad",
|
49
|
+
:gill_color => "yellow",
|
50
|
+
:stalk_shape => "enlarging",
|
51
|
+
:stalk_root => "missing",
|
52
|
+
:stalk_surface_above_ring => "smooth",
|
53
|
+
:stalk_surface_below_ring => "smooth",
|
54
|
+
:stalk_color_above_ring => "orange",
|
55
|
+
:stalk_color_below_ring => "orange",
|
56
|
+
:veil_type => "partial",
|
57
|
+
:veil_color => "orange",
|
58
|
+
:n_rings => 1,
|
59
|
+
:ring_type => "pendant",
|
60
|
+
:spore_print_color => "orange",
|
61
|
+
:population => "clustered",
|
62
|
+
:habitat => "leaves"
|
63
|
+
}
|
64
|
+
],
|
65
|
+
[
|
66
|
+
records.size,
|
67
|
+
records[0].to_h,
|
68
|
+
records[-1].to_h
|
69
|
+
])
|
70
|
+
end
|
71
|
+
|
72
|
+
sub_test_case("#metadata") do
|
73
|
+
test("#description") do
|
74
|
+
description = @dataset.metadata.description
|
75
|
+
assert do
|
76
|
+
description.start_with?("1. Title: Mushroom Database")
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
@@ -0,0 +1,239 @@
|
|
1
|
+
class PenguinsTest < Test::Unit::TestCase
|
2
|
+
sub_test_case("PenguinsRawData::SpeciesBase") do
|
3
|
+
test("#data_path") do
|
4
|
+
data_paths = [ Datasets::PenguinsRawData::Adelie,
|
5
|
+
Datasets::PenguinsRawData::Gentoo,
|
6
|
+
Datasets::PenguinsRawData::Chinstrap ].map {|cls|
|
7
|
+
dataset = cls.new
|
8
|
+
dataset.data_path.relative_path_from(dataset.send(:cache_dir_path)).to_s
|
9
|
+
}
|
10
|
+
assert_equal(["penguins/adelie.csv", "penguins/gentoo.csv", "penguins/chinstrap.csv"],
|
11
|
+
data_paths)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
sub_test_case("Adelie") do
|
16
|
+
def setup
|
17
|
+
@dataset = Datasets::PenguinsRawData::Adelie.new
|
18
|
+
end
|
19
|
+
|
20
|
+
test("#each") do
|
21
|
+
records = @dataset.each.to_a
|
22
|
+
assert_equal([ 152,
|
23
|
+
{
|
24
|
+
study_name: "PAL0708",
|
25
|
+
sample_number: 1,
|
26
|
+
species: "Adelie Penguin (Pygoscelis adeliae)",
|
27
|
+
region: "Anvers",
|
28
|
+
island: "Torgersen",
|
29
|
+
stage: "Adult, 1 Egg Stage",
|
30
|
+
individual_id: "N1A1",
|
31
|
+
clutch_completion: "Yes",
|
32
|
+
date_egg: DateTime.new(2007, 11, 11),
|
33
|
+
culmen_length_mm: 39.1,
|
34
|
+
culmen_depth_mm: 18.7,
|
35
|
+
flipper_length_mm: 181,
|
36
|
+
body_mass_g: 3750,
|
37
|
+
sex: "MALE",
|
38
|
+
delta_15_n_permil: nil,
|
39
|
+
delta_13_c_permil: nil,
|
40
|
+
comments: "Not enough blood for isotopes."
|
41
|
+
},
|
42
|
+
{
|
43
|
+
study_name: "PAL0910",
|
44
|
+
sample_number: 152,
|
45
|
+
species: "Adelie Penguin (Pygoscelis adeliae)",
|
46
|
+
region: "Anvers",
|
47
|
+
island: "Dream",
|
48
|
+
stage: "Adult, 1 Egg Stage",
|
49
|
+
individual_id: "N85A2",
|
50
|
+
clutch_completion: "Yes",
|
51
|
+
date_egg: DateTime.new(2009, 11, 17),
|
52
|
+
culmen_length_mm: 41.5,
|
53
|
+
culmen_depth_mm: 18.5,
|
54
|
+
flipper_length_mm: 201,
|
55
|
+
body_mass_g: 4000,
|
56
|
+
sex: "MALE",
|
57
|
+
delta_15_n_permil: 8.89640,
|
58
|
+
delta_13_c_permil: -26.06967,
|
59
|
+
comments: nil
|
60
|
+
}
|
61
|
+
],
|
62
|
+
[
|
63
|
+
records.size,
|
64
|
+
records[0].to_h,
|
65
|
+
records[-1].to_h
|
66
|
+
])
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
sub_test_case("Gentoo") do
|
71
|
+
def setup
|
72
|
+
@dataset = Datasets::PenguinsRawData::Gentoo.new
|
73
|
+
end
|
74
|
+
|
75
|
+
test("#each") do
|
76
|
+
records = @dataset.each.to_a
|
77
|
+
assert_equal([ 124,
|
78
|
+
{
|
79
|
+
study_name: "PAL0708",
|
80
|
+
sample_number: 1,
|
81
|
+
species: "Gentoo penguin (Pygoscelis papua)",
|
82
|
+
region: "Anvers",
|
83
|
+
island: "Biscoe",
|
84
|
+
stage: "Adult, 1 Egg Stage",
|
85
|
+
individual_id: "N31A1",
|
86
|
+
clutch_completion: "Yes",
|
87
|
+
date_egg: DateTime.new(2007, 11, 27),
|
88
|
+
culmen_length_mm: 46.1,
|
89
|
+
culmen_depth_mm: 13.2,
|
90
|
+
flipper_length_mm: 211,
|
91
|
+
body_mass_g: 4500,
|
92
|
+
sex: "FEMALE",
|
93
|
+
delta_15_n_permil: 7.993,
|
94
|
+
delta_13_c_permil: -25.5139,
|
95
|
+
comments: nil
|
96
|
+
},
|
97
|
+
{
|
98
|
+
study_name: "PAL0910",
|
99
|
+
sample_number: 124,
|
100
|
+
species: "Gentoo penguin (Pygoscelis papua)",
|
101
|
+
region: "Anvers",
|
102
|
+
island: "Biscoe",
|
103
|
+
stage: "Adult, 1 Egg Stage",
|
104
|
+
individual_id: "N43A2",
|
105
|
+
clutch_completion: "Yes",
|
106
|
+
date_egg: DateTime.new(2009, 11, 22),
|
107
|
+
culmen_length_mm: 49.9,
|
108
|
+
culmen_depth_mm: 16.1,
|
109
|
+
flipper_length_mm: 213,
|
110
|
+
body_mass_g: 5400,
|
111
|
+
sex: "MALE",
|
112
|
+
delta_15_n_permil: 8.3639,
|
113
|
+
delta_13_c_permil: -26.15531,
|
114
|
+
comments: nil
|
115
|
+
}
|
116
|
+
],
|
117
|
+
[
|
118
|
+
records.size,
|
119
|
+
records[0].to_h,
|
120
|
+
records[-1].to_h
|
121
|
+
])
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
sub_test_case("Chinstrap") do
|
126
|
+
def setup
|
127
|
+
@dataset = Datasets::PenguinsRawData::Chinstrap.new
|
128
|
+
end
|
129
|
+
|
130
|
+
test("#each") do
|
131
|
+
records = @dataset.each.to_a
|
132
|
+
assert_equal([ 68,
|
133
|
+
{
|
134
|
+
study_name: "PAL0708",
|
135
|
+
sample_number: 1,
|
136
|
+
species: "Chinstrap penguin (Pygoscelis antarctica)",
|
137
|
+
region: "Anvers",
|
138
|
+
island: "Dream",
|
139
|
+
stage: "Adult, 1 Egg Stage",
|
140
|
+
individual_id: "N61A1",
|
141
|
+
clutch_completion: "No",
|
142
|
+
date_egg: DateTime.new(2007, 11, 19),
|
143
|
+
culmen_length_mm: 46.5,
|
144
|
+
culmen_depth_mm: 17.9,
|
145
|
+
flipper_length_mm: 192,
|
146
|
+
body_mass_g: 3500,
|
147
|
+
sex: "FEMALE",
|
148
|
+
delta_15_n_permil: 9.03935,
|
149
|
+
delta_13_c_permil: -24.30229,
|
150
|
+
comments: "Nest never observed with full clutch."
|
151
|
+
},
|
152
|
+
{
|
153
|
+
study_name: "PAL0910",
|
154
|
+
sample_number: 68,
|
155
|
+
species: "Chinstrap penguin (Pygoscelis antarctica)",
|
156
|
+
region: "Anvers",
|
157
|
+
island: "Dream",
|
158
|
+
stage: "Adult, 1 Egg Stage",
|
159
|
+
individual_id: "N100A2",
|
160
|
+
clutch_completion: "Yes",
|
161
|
+
date_egg: DateTime.new(2009, 11, 21),
|
162
|
+
culmen_length_mm: 50.2,
|
163
|
+
culmen_depth_mm: 18.7,
|
164
|
+
flipper_length_mm: 198,
|
165
|
+
body_mass_g: 3775,
|
166
|
+
sex: "FEMALE",
|
167
|
+
delta_15_n_permil: 9.39305,
|
168
|
+
delta_13_c_permil: -24.25255,
|
169
|
+
comments: nil
|
170
|
+
}
|
171
|
+
],
|
172
|
+
[
|
173
|
+
records.size,
|
174
|
+
records[0].to_h,
|
175
|
+
records[-1].to_h
|
176
|
+
])
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
sub_test_case("Penguins") do
|
181
|
+
def setup
|
182
|
+
@dataset = Datasets::Penguins.new
|
183
|
+
end
|
184
|
+
|
185
|
+
test("#each") do
|
186
|
+
records = @dataset.each.to_a
|
187
|
+
assert_equal([
|
188
|
+
344,
|
189
|
+
{
|
190
|
+
species: "Adelie",
|
191
|
+
island: "Torgersen",
|
192
|
+
bill_length_mm: 39.1,
|
193
|
+
bill_depth_mm: 18.7,
|
194
|
+
flipper_length_mm: 181,
|
195
|
+
body_mass_g: 3750,
|
196
|
+
sex: "male",
|
197
|
+
year: 2007
|
198
|
+
},
|
199
|
+
{
|
200
|
+
species: "Gentoo",
|
201
|
+
island: "Biscoe",
|
202
|
+
bill_length_mm: 46.1,
|
203
|
+
bill_depth_mm: 13.2,
|
204
|
+
flipper_length_mm: 211,
|
205
|
+
body_mass_g: 4500,
|
206
|
+
sex: "female",
|
207
|
+
year: 2007
|
208
|
+
},
|
209
|
+
{
|
210
|
+
species: "Chinstrap",
|
211
|
+
island: "Dream",
|
212
|
+
bill_length_mm: 46.5,
|
213
|
+
bill_depth_mm: 17.9,
|
214
|
+
flipper_length_mm: 192,
|
215
|
+
body_mass_g: 3500,
|
216
|
+
sex: "female",
|
217
|
+
year: 2007
|
218
|
+
},
|
219
|
+
{
|
220
|
+
species: "Chinstrap",
|
221
|
+
island: "Dream",
|
222
|
+
bill_length_mm: 50.2,
|
223
|
+
bill_depth_mm: 18.7,
|
224
|
+
flipper_length_mm: 198,
|
225
|
+
body_mass_g: 3775,
|
226
|
+
sex: "female",
|
227
|
+
year: 2009
|
228
|
+
}
|
229
|
+
],
|
230
|
+
[
|
231
|
+
records.size,
|
232
|
+
records[0].to_h,
|
233
|
+
records[152].to_h,
|
234
|
+
records[276].to_h,
|
235
|
+
records[-1].to_h,
|
236
|
+
])
|
237
|
+
end
|
238
|
+
end
|
239
|
+
end
|
data/test/test-penn-treebank.rb
CHANGED
@@ -9,8 +9,8 @@ class PennTreebankTest < Test::Unit::TestCase
|
|
9
9
|
records = dataset.to_a
|
10
10
|
assert_equal([
|
11
11
|
887521,
|
12
|
-
record("aer"
|
13
|
-
record("<unk>"
|
12
|
+
record("aer"),
|
13
|
+
record("<unk>"),
|
14
14
|
],
|
15
15
|
[
|
16
16
|
records.size,
|
@@ -24,8 +24,8 @@ class PennTreebankTest < Test::Unit::TestCase
|
|
24
24
|
records = dataset.to_a
|
25
25
|
assert_equal([
|
26
26
|
78669,
|
27
|
-
record("no"
|
28
|
-
record("us"
|
27
|
+
record("no"),
|
28
|
+
record("us"),
|
29
29
|
],
|
30
30
|
[
|
31
31
|
records.size,
|
@@ -39,8 +39,8 @@ class PennTreebankTest < Test::Unit::TestCase
|
|
39
39
|
records = dataset.to_a
|
40
40
|
assert_equal([
|
41
41
|
70390,
|
42
|
-
record("consumers"
|
43
|
-
record("N"
|
42
|
+
record("consumers"),
|
43
|
+
record("N"),
|
44
44
|
],
|
45
45
|
[
|
46
46
|
records.size,
|
@@ -0,0 +1,69 @@
|
|
1
|
+
class PostalCodeJapanTest < Test::Unit::TestCase
|
2
|
+
sub_test_case(":reading") do
|
3
|
+
test(":lowercase") do
|
4
|
+
dataset = Datasets::PostalCodeJapan.new(reading: :lowercase)
|
5
|
+
assert_equal({
|
6
|
+
organization_code: "01101",
|
7
|
+
old_postal_code: "060",
|
8
|
+
postal_code: "0600000",
|
9
|
+
prefecture_reading: "ホッカイドウ",
|
10
|
+
city_reading: "サッポロシチュウオウク",
|
11
|
+
address_reading: "イカニケイサイガナイバアイ",
|
12
|
+
prefecture: "北海道",
|
13
|
+
city: "札幌市中央区",
|
14
|
+
address: "以下に掲載がない場合",
|
15
|
+
have_multiple_postal_codes: false,
|
16
|
+
have_address_number_per_koaza: false,
|
17
|
+
have_chome: false,
|
18
|
+
postal_code_is_shared: false,
|
19
|
+
changed: false,
|
20
|
+
change_reason: nil,
|
21
|
+
},
|
22
|
+
dataset.first.to_h)
|
23
|
+
end
|
24
|
+
|
25
|
+
test(":uppercase") do
|
26
|
+
dataset = Datasets::PostalCodeJapan.new(reading: :uppercase)
|
27
|
+
assert_equal({
|
28
|
+
organization_code: "01101",
|
29
|
+
old_postal_code: "060",
|
30
|
+
postal_code: "0600000",
|
31
|
+
prefecture_reading: "ホツカイドウ",
|
32
|
+
city_reading: "サツポロシチユウオウク",
|
33
|
+
address_reading: "イカニケイサイガナイバアイ",
|
34
|
+
prefecture: "北海道",
|
35
|
+
city: "札幌市中央区",
|
36
|
+
address: "以下に掲載がない場合",
|
37
|
+
have_multiple_postal_codes: false,
|
38
|
+
have_address_number_per_koaza: false,
|
39
|
+
have_chome: false,
|
40
|
+
postal_code_is_shared: false,
|
41
|
+
changed: false,
|
42
|
+
change_reason: nil,
|
43
|
+
},
|
44
|
+
dataset.first.to_h)
|
45
|
+
end
|
46
|
+
|
47
|
+
test(":romaji") do
|
48
|
+
dataset = Datasets::PostalCodeJapan.new(reading: :romaji)
|
49
|
+
assert_equal({
|
50
|
+
organization_code: nil,
|
51
|
+
old_postal_code: nil,
|
52
|
+
postal_code: "0600000",
|
53
|
+
prefecture_reading: "HOKKAIDO",
|
54
|
+
city_reading: "SAPPORO SHI CHUO KU",
|
55
|
+
address_reading: "IKANIKEISAIGANAIBAAI",
|
56
|
+
prefecture: "北海道",
|
57
|
+
city: "札幌市 中央区",
|
58
|
+
address: "以下に掲載がない場合",
|
59
|
+
have_multiple_postal_codes: false,
|
60
|
+
have_address_number_per_koaza: false,
|
61
|
+
have_chome: false,
|
62
|
+
postal_code_is_shared: false,
|
63
|
+
changed: false,
|
64
|
+
change_reason: nil,
|
65
|
+
},
|
66
|
+
dataset.first.to_h)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
data/test/test-table.rb
CHANGED
@@ -3,9 +3,149 @@ class TableTest < Test::Unit::TestCase
|
|
3
3
|
@table = Datasets::Iris.new.to_table
|
4
4
|
end
|
5
5
|
|
6
|
-
test("#
|
7
|
-
assert_equal(
|
8
|
-
|
6
|
+
test("#n_columns") do
|
7
|
+
assert_equal(5, @table.n_columns)
|
8
|
+
end
|
9
|
+
|
10
|
+
test("#n_rows") do
|
11
|
+
assert_equal(150, @table.n_rows)
|
12
|
+
end
|
13
|
+
|
14
|
+
test("#column_names") do
|
15
|
+
assert_equal([
|
16
|
+
:sepal_length,
|
17
|
+
:sepal_width,
|
18
|
+
:petal_length,
|
19
|
+
:petal_width,
|
20
|
+
:label,
|
21
|
+
],
|
22
|
+
@table.column_names)
|
23
|
+
end
|
24
|
+
|
25
|
+
test("#each") do
|
26
|
+
shorten_hash = {}
|
27
|
+
@table.each do |name, values|
|
28
|
+
shorten_hash[name] = values.first(5)
|
29
|
+
end
|
30
|
+
assert_equal({
|
31
|
+
:label => ["Iris-setosa"] * 5,
|
32
|
+
:petal_length => [1.4, 1.4, 1.3, 1.5, 1.4],
|
33
|
+
:petal_width => [0.2, 0.2, 0.2, 0.2, 0.2],
|
34
|
+
:sepal_length => [5.1, 4.9, 4.7, 4.6, 5.0],
|
35
|
+
:sepal_width => [3.5, 3.0, 3.2, 3.1, 3.6],
|
36
|
+
},
|
37
|
+
shorten_hash)
|
38
|
+
end
|
39
|
+
|
40
|
+
test("#each_column") do
|
41
|
+
shorten_hash = {}
|
42
|
+
@table.each_column do |name, values|
|
43
|
+
shorten_hash[name] = values.first(5)
|
44
|
+
end
|
45
|
+
assert_equal({
|
46
|
+
:label => ["Iris-setosa"] * 5,
|
47
|
+
:petal_length => [1.4, 1.4, 1.3, 1.5, 1.4],
|
48
|
+
:petal_width => [0.2, 0.2, 0.2, 0.2, 0.2],
|
49
|
+
:sepal_length => [5.1, 4.9, 4.7, 4.6, 5.0],
|
50
|
+
:sepal_width => [3.5, 3.0, 3.2, 3.1, 3.6],
|
51
|
+
},
|
52
|
+
shorten_hash)
|
53
|
+
end
|
54
|
+
|
55
|
+
test("#each_record") do
|
56
|
+
records = []
|
57
|
+
@table.each_record do |record|
|
58
|
+
records << record
|
59
|
+
break if records.size == 3
|
60
|
+
end
|
61
|
+
assert_equal([
|
62
|
+
{
|
63
|
+
label: "Iris-setosa",
|
64
|
+
petal_length: 1.4,
|
65
|
+
petal_width: 0.2,
|
66
|
+
sepal_length: 5.1,
|
67
|
+
sepal_width: 3.5,
|
68
|
+
},
|
69
|
+
{
|
70
|
+
label: "Iris-setosa",
|
71
|
+
petal_length: 1.4,
|
72
|
+
petal_width: 0.2,
|
73
|
+
sepal_length: 4.9,
|
74
|
+
sepal_width: 3.0,
|
75
|
+
},
|
76
|
+
{
|
77
|
+
label: "Iris-setosa",
|
78
|
+
petal_length: 1.3,
|
79
|
+
petal_width: 0.2,
|
80
|
+
sepal_length: 4.7,
|
81
|
+
sepal_width: 3.2,
|
82
|
+
},
|
83
|
+
],
|
84
|
+
records.collect(&:to_h))
|
85
|
+
end
|
86
|
+
|
87
|
+
sub_test_case("#find_record") do
|
88
|
+
test("positive") do
|
89
|
+
assert_equal({
|
90
|
+
label: "Iris-setosa",
|
91
|
+
petal_length: 1.4,
|
92
|
+
petal_width: 0.2,
|
93
|
+
sepal_length: 4.9,
|
94
|
+
sepal_width: 3.0,
|
95
|
+
},
|
96
|
+
@table.find_record(1).to_h)
|
97
|
+
end
|
98
|
+
|
99
|
+
test("positive - over") do
|
100
|
+
assert_nil(@table.find_record(151))
|
101
|
+
end
|
102
|
+
|
103
|
+
test("negative") do
|
104
|
+
assert_equal({
|
105
|
+
label: "Iris-virginica",
|
106
|
+
petal_length: 5.1,
|
107
|
+
petal_width: 1.8,
|
108
|
+
sepal_length: 5.9,
|
109
|
+
sepal_width: 3.0,
|
110
|
+
},
|
111
|
+
@table.find_record(-1).to_h)
|
112
|
+
end
|
113
|
+
|
114
|
+
test("negative - over") do
|
115
|
+
assert_nil(@table.find_record(-151))
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
sub_test_case("#[]") do
|
120
|
+
test("index") do
|
121
|
+
assert_equal([1.4, 1.4, 1.3, 1.5, 1.4],
|
122
|
+
@table[2].first(5))
|
123
|
+
end
|
124
|
+
|
125
|
+
test("name") do
|
126
|
+
assert_equal([1.4, 1.4, 1.3, 1.5, 1.4],
|
127
|
+
@table[:petal_length].first(5))
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
test("#dictionary_encode") do
|
132
|
+
assert_equal([
|
133
|
+
[0, "Iris-setosa"],
|
134
|
+
[1, "Iris-versicolor"],
|
135
|
+
[2, "Iris-virginica"],
|
136
|
+
],
|
137
|
+
@table.dictionary_encode(:label).to_a)
|
138
|
+
end
|
139
|
+
|
140
|
+
test("#label_encode") do
|
141
|
+
label_encoded_labels = @table.label_encode(:label)
|
142
|
+
labels = @table[:label]
|
143
|
+
assert_equal([0, 1, 2],
|
144
|
+
[
|
145
|
+
label_encoded_labels[labels.find_index("Iris-setosa")],
|
146
|
+
label_encoded_labels[labels.find_index("Iris-versicolor")],
|
147
|
+
label_encoded_labels[labels.find_index("Iris-virginica")],
|
148
|
+
])
|
9
149
|
end
|
10
150
|
|
11
151
|
sub_test_case("#fetch_values") do
|
@@ -38,28 +178,13 @@ class TableTest < Test::Unit::TestCase
|
|
38
178
|
end
|
39
179
|
end
|
40
180
|
|
41
|
-
test("#each") do
|
42
|
-
shorten_hash = {}
|
43
|
-
@table.each do |name, values|
|
44
|
-
shorten_hash[name] = values.first(5)
|
45
|
-
end
|
46
|
-
assert_equal({
|
47
|
-
:class => ["Iris-setosa"] * 5,
|
48
|
-
:petal_length => [1.4, 1.4, 1.3, 1.5, 1.4],
|
49
|
-
:petal_width => [0.2, 0.2, 0.2, 0.2, 0.2],
|
50
|
-
:sepal_length => [5.1, 4.9, 4.7, 4.6, 5.0],
|
51
|
-
:sepal_width => [3.5, 3.0, 3.2, 3.1, 3.6],
|
52
|
-
},
|
53
|
-
shorten_hash)
|
54
|
-
end
|
55
|
-
|
56
181
|
test("#to_h") do
|
57
182
|
shorten_hash = {}
|
58
183
|
@table.to_h.each do |name, values|
|
59
184
|
shorten_hash[name] = values.first(5)
|
60
185
|
end
|
61
186
|
assert_equal({
|
62
|
-
:
|
187
|
+
:label => ["Iris-setosa"] * 5,
|
63
188
|
:petal_length => [1.4, 1.4, 1.3, 1.5, 1.4],
|
64
189
|
:petal_width => [0.2, 0.2, 0.2, 0.2, 0.2],
|
65
190
|
:sepal_length => [5.1, 4.9, 4.7, 4.6, 5.0],
|