red-datasets 0.0.7 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +20 -4
- data/doc/text/news.md +102 -0
- data/lib/datasets.rb +19 -9
- data/lib/datasets/adult.rb +4 -3
- data/lib/datasets/cifar.rb +4 -12
- data/lib/datasets/cldr-plurals.rb +385 -0
- data/lib/datasets/communities.rb +198 -0
- data/lib/datasets/dataset.rb +20 -1
- data/lib/datasets/downloader.rb +54 -26
- data/lib/datasets/e-stat-japan.rb +320 -0
- data/lib/datasets/error.rb +4 -0
- data/lib/datasets/hepatitis.rb +207 -0
- data/lib/datasets/libsvm-dataset-list.rb +277 -0
- data/lib/datasets/libsvm.rb +135 -0
- data/lib/datasets/mnist.rb +0 -2
- data/lib/datasets/mushroom.rb +256 -0
- data/lib/datasets/penguins.rb +146 -0
- data/lib/datasets/postal-code-japan.rb +154 -0
- data/lib/datasets/rdatasets.rb +95 -0
- data/lib/datasets/table.rb +83 -3
- data/lib/datasets/tar_gz_readable.rb +14 -0
- data/lib/datasets/version.rb +1 -1
- data/lib/datasets/wikipedia.rb +2 -10
- data/red-datasets.gemspec +4 -0
- data/test/run-test.rb +2 -0
- data/test/test-cldr-plurals.rb +180 -0
- data/test/test-communities.rb +290 -0
- data/test/test-dataset.rb +27 -0
- data/test/test-downloader.rb +29 -0
- data/test/test-e-stat-japan.rb +383 -0
- data/test/test-hepatitis.rb +74 -0
- data/test/test-libsvm-dataset-list.rb +47 -0
- data/test/test-libsvm.rb +205 -0
- data/test/test-mushroom.rb +80 -0
- data/test/test-penguins.rb +251 -0
- data/test/test-postal-code-japan.rb +69 -0
- data/test/test-rdatasets.rb +136 -0
- data/test/test-table.rb +123 -18
- metadata +88 -11
@@ -0,0 +1,74 @@
|
|
1
|
+
class HepatitisTest < Test::Unit::TestCase
|
2
|
+
def setup
|
3
|
+
@dataset = Datasets::Hepatitis.new
|
4
|
+
end
|
5
|
+
|
6
|
+
def record(*args)
|
7
|
+
Datasets::Hepatitis::Record.new(*args)
|
8
|
+
end
|
9
|
+
|
10
|
+
test("#each") do
|
11
|
+
records = @dataset.each.to_a
|
12
|
+
assert_equal([
|
13
|
+
155,
|
14
|
+
{
|
15
|
+
:label => :live,
|
16
|
+
:age => 30,
|
17
|
+
:sex => :female,
|
18
|
+
:steroid => false,
|
19
|
+
:antivirals => true,
|
20
|
+
:fatigue => true,
|
21
|
+
:malaise => true,
|
22
|
+
:anorexia => true,
|
23
|
+
:liver_big => false,
|
24
|
+
:liver_firm => true,
|
25
|
+
:spleen_palpable => true,
|
26
|
+
:spiders => true,
|
27
|
+
:ascites => true,
|
28
|
+
:varices => true,
|
29
|
+
:bilirubin => 1.0,
|
30
|
+
:alkaline_phosphate => 85,
|
31
|
+
:sgot => 18,
|
32
|
+
:albumin => 4.0,
|
33
|
+
:protime => nil,
|
34
|
+
:histology => false,
|
35
|
+
},
|
36
|
+
{
|
37
|
+
:label => :die,
|
38
|
+
:age => 43,
|
39
|
+
:sex => :male,
|
40
|
+
:steroid => true,
|
41
|
+
:antivirals => true,
|
42
|
+
:fatigue => false,
|
43
|
+
:malaise => true,
|
44
|
+
:anorexia => true,
|
45
|
+
:liver_big => true,
|
46
|
+
:liver_firm => true,
|
47
|
+
:spleen_palpable => false,
|
48
|
+
:spiders => false,
|
49
|
+
:ascites => false,
|
50
|
+
:varices => true,
|
51
|
+
:bilirubin => 1.2,
|
52
|
+
:alkaline_phosphate => 100,
|
53
|
+
:sgot => 19,
|
54
|
+
:albumin => 3.1,
|
55
|
+
:protime => 42,
|
56
|
+
:histology => true,
|
57
|
+
}
|
58
|
+
],
|
59
|
+
[
|
60
|
+
records.size,
|
61
|
+
records[0].to_h,
|
62
|
+
records[-1].to_h,
|
63
|
+
])
|
64
|
+
end
|
65
|
+
|
66
|
+
sub_test_case("#metadata") do
|
67
|
+
test("#description") do
|
68
|
+
description = @dataset.metadata.description
|
69
|
+
assert do
|
70
|
+
description.start_with?("1. Title: Hepatitis Domain")
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
class LIBSVMDatasetListTest < Test::Unit::TestCase
|
2
|
+
def setup
|
3
|
+
@dataset = Datasets::LIBSVMDatasetList.new
|
4
|
+
end
|
5
|
+
|
6
|
+
test("#each") do
|
7
|
+
assert_equal({
|
8
|
+
name: "a1a",
|
9
|
+
source: "UCI / Adult",
|
10
|
+
preprocessing:
|
11
|
+
"The original Adult data set has 14 features, " +
|
12
|
+
"among which six are continuous and eight are " +
|
13
|
+
"categorical. In this data set, continuous features " +
|
14
|
+
"are discretized into quantiles, and each quantile is " +
|
15
|
+
"represented by a binary feature. Also, a categorical " +
|
16
|
+
"feature with m categories is converted to m binary " +
|
17
|
+
"features. Details on how each feature is converted " +
|
18
|
+
"can be found in the beginning of each file from this " +
|
19
|
+
"page. [JP98a]",
|
20
|
+
n_classes: 2,
|
21
|
+
n_data: 1605,
|
22
|
+
n_features: 123,
|
23
|
+
files: [
|
24
|
+
{
|
25
|
+
name: "a1a",
|
26
|
+
url: "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/a1a",
|
27
|
+
note: nil,
|
28
|
+
},
|
29
|
+
{
|
30
|
+
name: "a1a.t",
|
31
|
+
url: "https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary/a1a.t",
|
32
|
+
note: "testing",
|
33
|
+
}
|
34
|
+
],
|
35
|
+
},
|
36
|
+
@dataset.first.to_h)
|
37
|
+
end
|
38
|
+
|
39
|
+
sub_test_case("#metadata") do
|
40
|
+
test("#description") do
|
41
|
+
description = @dataset.metadata.description
|
42
|
+
assert do
|
43
|
+
description.start_with?("This page contains many classification, ")
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
data/test/test-libsvm.rb
ADDED
@@ -0,0 +1,205 @@
|
|
1
|
+
class LIBSVMDatasetTest < Test::Unit::TestCase
|
2
|
+
test(":note") do
|
3
|
+
dataset = Datasets::LIBSVM.new("a1a", note: "testing")
|
4
|
+
hash = {label: -1}
|
5
|
+
n_features = 123
|
6
|
+
n_features.times do |i|
|
7
|
+
hash[i] = 0
|
8
|
+
end
|
9
|
+
[5, 7, 14, 19, 39, 40, 51, 63, 67, 73, 74, 76, 78, 83].each do |i|
|
10
|
+
hash[i - 1] = 1
|
11
|
+
end
|
12
|
+
assert_equal(hash,
|
13
|
+
dataset.first.to_h)
|
14
|
+
end
|
15
|
+
|
16
|
+
test(":default_feature_value") do
|
17
|
+
dataset = Datasets::LIBSVM.new("a1a", default_feature_value: nil)
|
18
|
+
hash = {label: -1}
|
19
|
+
n_features = 123
|
20
|
+
n_features.times do |i|
|
21
|
+
hash[i] = nil
|
22
|
+
end
|
23
|
+
[3, 11, 14, 19, 39, 42, 55, 64, 67, 73, 75, 76, 80, 83].each do |i|
|
24
|
+
hash[i - 1] = 1
|
25
|
+
end
|
26
|
+
assert_equal(hash,
|
27
|
+
dataset.first.to_h)
|
28
|
+
end
|
29
|
+
|
30
|
+
test("classification") do
|
31
|
+
dataset = Datasets::LIBSVM.new("a1a")
|
32
|
+
hash = {label: -1}
|
33
|
+
n_features = 123
|
34
|
+
n_features.times do |i|
|
35
|
+
hash[i] = 0
|
36
|
+
end
|
37
|
+
[3, 11, 14, 19, 39, 42, 55, 64, 67, 73, 75, 76, 80, 83].each do |i|
|
38
|
+
hash[i - 1] = 1
|
39
|
+
end
|
40
|
+
assert_equal(hash,
|
41
|
+
dataset.first.to_h)
|
42
|
+
end
|
43
|
+
|
44
|
+
test("regression") do
|
45
|
+
dataset = Datasets::LIBSVM.new("abalone")
|
46
|
+
hash = {label: 15}
|
47
|
+
n_features = 8
|
48
|
+
n_features.times do |i|
|
49
|
+
hash[i] = 0
|
50
|
+
end
|
51
|
+
[
|
52
|
+
[1, 1],
|
53
|
+
[2, 0.455],
|
54
|
+
[3, 0.365],
|
55
|
+
[4, 0.095],
|
56
|
+
[5, 0.514],
|
57
|
+
[6, 0.2245],
|
58
|
+
[7, 0.101],
|
59
|
+
[8, 0.15],
|
60
|
+
].each do |i, value|
|
61
|
+
hash[i - 1] = value
|
62
|
+
end
|
63
|
+
assert_equal(hash,
|
64
|
+
dataset.first.to_h)
|
65
|
+
end
|
66
|
+
|
67
|
+
test("multi-label") do
|
68
|
+
dataset = Datasets::LIBSVM.new("mediamill (exp1)")
|
69
|
+
hash = {label: [65, 67, 11, 31]}
|
70
|
+
n_features = 120
|
71
|
+
n_features.times do |i|
|
72
|
+
hash[i] = 0
|
73
|
+
end
|
74
|
+
[
|
75
|
+
[1, 0.380877],
|
76
|
+
[2, 0.494079],
|
77
|
+
[3, 0.540009],
|
78
|
+
[4, 0.422926],
|
79
|
+
[5, 0.158318],
|
80
|
+
[6, 0.326975],
|
81
|
+
[7, 0.390861],
|
82
|
+
[8, 0.527121],
|
83
|
+
[9, 0.254052],
|
84
|
+
[10, 0.223731],
|
85
|
+
[11, 0.040285],
|
86
|
+
[12, 0.141133],
|
87
|
+
[13, 0.112249],
|
88
|
+
[14, 0.263171],
|
89
|
+
[15, 0.147020],
|
90
|
+
[16, 0.472414],
|
91
|
+
[17, 0.592614],
|
92
|
+
[18, 0.653138],
|
93
|
+
[19, 0.499867],
|
94
|
+
[20, 0.196520],
|
95
|
+
[21, 0.403892],
|
96
|
+
[22, 0.482395],
|
97
|
+
[23, 0.619219],
|
98
|
+
[24, 0.320346],
|
99
|
+
[25, 0.281251],
|
100
|
+
[26, 0.054750],
|
101
|
+
[27, 0.180459],
|
102
|
+
[28, 0.139964],
|
103
|
+
[29, 0.319925],
|
104
|
+
[30, 0.181216],
|
105
|
+
[31, 0.364294],
|
106
|
+
[32, 0.407211],
|
107
|
+
[33, 0.368926],
|
108
|
+
[34, 0.427661],
|
109
|
+
[35, 0.211391],
|
110
|
+
[36, 0.364345],
|
111
|
+
[37, 0.370710],
|
112
|
+
[38, 0.409107],
|
113
|
+
[39, 0.289299],
|
114
|
+
[40, 0.243053],
|
115
|
+
[41, 0.063121],
|
116
|
+
[42, 0.193587],
|
117
|
+
[43, 0.158755],
|
118
|
+
[44, 0.316054],
|
119
|
+
[45, 0.197410],
|
120
|
+
[46, 0.656168],
|
121
|
+
[47, 0.678760],
|
122
|
+
[48, 0.650831],
|
123
|
+
[49, 0.674636],
|
124
|
+
[50, 0.492428],
|
125
|
+
[51, 0.623887],
|
126
|
+
[52, 0.610622],
|
127
|
+
[53, 0.678219],
|
128
|
+
[54, 0.574774],
|
129
|
+
[55, 0.523073],
|
130
|
+
[56, 0.206804],
|
131
|
+
[57, 0.496294],
|
132
|
+
[58, 0.429221],
|
133
|
+
[59, 0.586611],
|
134
|
+
[60, 0.471550],
|
135
|
+
[61, 0.284480],
|
136
|
+
[62, 0.432466],
|
137
|
+
[63, 0.498075],
|
138
|
+
[64, 0.408141],
|
139
|
+
[65, 0.102713],
|
140
|
+
[66, 0.303028],
|
141
|
+
[67, 0.309501],
|
142
|
+
[68, 0.444855],
|
143
|
+
[69, 0.191727],
|
144
|
+
[70, 0.174895],
|
145
|
+
[71, 0.034143],
|
146
|
+
[72, 0.153099],
|
147
|
+
[73, 0.068318],
|
148
|
+
[74, 0.217020],
|
149
|
+
[75, 0.099688],
|
150
|
+
[76, 0.409862],
|
151
|
+
[77, 0.561918],
|
152
|
+
[78, 0.612031],
|
153
|
+
[79, 0.514471],
|
154
|
+
[80, 0.146015],
|
155
|
+
[81, 0.398807],
|
156
|
+
[82, 0.383295],
|
157
|
+
[83, 0.548485],
|
158
|
+
[84, 0.282937],
|
159
|
+
[85, 0.252712],
|
160
|
+
[86, 0.051008],
|
161
|
+
[87, 0.223110],
|
162
|
+
[88, 0.098112],
|
163
|
+
[89, 0.299672],
|
164
|
+
[90, 0.144873],
|
165
|
+
[91, 0.308488],
|
166
|
+
[92, 0.358478],
|
167
|
+
[93, 0.352077],
|
168
|
+
[94, 0.394686],
|
169
|
+
[95, 0.157513],
|
170
|
+
[96, 0.339370],
|
171
|
+
[97, 0.321558],
|
172
|
+
[98, 0.341373],
|
173
|
+
[99, 0.247969],
|
174
|
+
[100, 0.206070],
|
175
|
+
[101, 0.061001],
|
176
|
+
[102, 0.216793],
|
177
|
+
[103, 0.112389],
|
178
|
+
[104, 0.273648],
|
179
|
+
[105, 0.152745],
|
180
|
+
[106, 0.598081],
|
181
|
+
[107, 0.621687],
|
182
|
+
[108, 0.607213],
|
183
|
+
[109, 0.644025],
|
184
|
+
[110, 0.394948],
|
185
|
+
[111, 0.593651],
|
186
|
+
[112, 0.551529],
|
187
|
+
[113, 0.574392],
|
188
|
+
[114, 0.511032],
|
189
|
+
[115, 0.463997],
|
190
|
+
[116, 0.202034],
|
191
|
+
[117, 0.492341],
|
192
|
+
[118, 0.317983],
|
193
|
+
[119, 0.547807],
|
194
|
+
[120, 0.393778],
|
195
|
+
].each do |i, value|
|
196
|
+
hash[i - 1] = value
|
197
|
+
end
|
198
|
+
assert_equal(hash,
|
199
|
+
dataset.first.to_h)
|
200
|
+
end
|
201
|
+
|
202
|
+
test("string") do
|
203
|
+
# TODO
|
204
|
+
end
|
205
|
+
end
|
@@ -0,0 +1,80 @@
|
|
1
|
+
class MushroomTest < Test::Unit::TestCase
|
2
|
+
def setup
|
3
|
+
@dataset = Datasets::Mushroom.new
|
4
|
+
end
|
5
|
+
|
6
|
+
def record(*args)
|
7
|
+
Datasets::Mushroom::Record.new(*args)
|
8
|
+
end
|
9
|
+
|
10
|
+
test("#each") do
|
11
|
+
records = @dataset.each.to_a
|
12
|
+
assert_equal([
|
13
|
+
8124,
|
14
|
+
{
|
15
|
+
:label => "poisonous",
|
16
|
+
:cap_shape => "convex",
|
17
|
+
:cap_surface => "smooth",
|
18
|
+
:cap_color => "brown",
|
19
|
+
:bruises => "bruises",
|
20
|
+
:odor => "pungent",
|
21
|
+
:gill_attachment => "free",
|
22
|
+
:gill_spacing => "close",
|
23
|
+
:gill_size => "narrow",
|
24
|
+
:gill_color => "black",
|
25
|
+
:stalk_shape => "enlarging",
|
26
|
+
:stalk_root => "equal",
|
27
|
+
:stalk_surface_above_ring => "smooth",
|
28
|
+
:stalk_surface_below_ring => "smooth",
|
29
|
+
:stalk_color_above_ring => "white",
|
30
|
+
:stalk_color_below_ring => "white",
|
31
|
+
:veil_type => "partial",
|
32
|
+
:veil_color => "white",
|
33
|
+
:n_rings => 1,
|
34
|
+
:ring_type => "pendant",
|
35
|
+
:spore_print_color => "black",
|
36
|
+
:population => "scattered",
|
37
|
+
:habitat => "urban"
|
38
|
+
},
|
39
|
+
{
|
40
|
+
:label => "edible",
|
41
|
+
:cap_shape => "convex",
|
42
|
+
:cap_surface => "smooth",
|
43
|
+
:cap_color => "brown",
|
44
|
+
:bruises => "no",
|
45
|
+
:odor => "none",
|
46
|
+
:gill_attachment => "attached",
|
47
|
+
:gill_spacing => "close",
|
48
|
+
:gill_size => "broad",
|
49
|
+
:gill_color => "yellow",
|
50
|
+
:stalk_shape => "enlarging",
|
51
|
+
:stalk_root => "missing",
|
52
|
+
:stalk_surface_above_ring => "smooth",
|
53
|
+
:stalk_surface_below_ring => "smooth",
|
54
|
+
:stalk_color_above_ring => "orange",
|
55
|
+
:stalk_color_below_ring => "orange",
|
56
|
+
:veil_type => "partial",
|
57
|
+
:veil_color => "orange",
|
58
|
+
:n_rings => 1,
|
59
|
+
:ring_type => "pendant",
|
60
|
+
:spore_print_color => "orange",
|
61
|
+
:population => "clustered",
|
62
|
+
:habitat => "leaves"
|
63
|
+
}
|
64
|
+
],
|
65
|
+
[
|
66
|
+
records.size,
|
67
|
+
records[0].to_h,
|
68
|
+
records[-1].to_h
|
69
|
+
])
|
70
|
+
end
|
71
|
+
|
72
|
+
sub_test_case("#metadata") do
|
73
|
+
test("#description") do
|
74
|
+
description = @dataset.metadata.description
|
75
|
+
assert do
|
76
|
+
description.start_with?("1. Title: Mushroom Database")
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
@@ -0,0 +1,251 @@
|
|
1
|
+
class PenguinsTest < Test::Unit::TestCase
|
2
|
+
sub_test_case("PenguinsRawData::SpeciesBase") do
|
3
|
+
test("#data_path") do
|
4
|
+
data_paths = [ Datasets::PenguinsRawData::Adelie,
|
5
|
+
Datasets::PenguinsRawData::Gentoo,
|
6
|
+
Datasets::PenguinsRawData::Chinstrap ].map {|cls|
|
7
|
+
dataset = cls.new
|
8
|
+
dataset.data_path.relative_path_from(dataset.send(:cache_dir_path)).to_s
|
9
|
+
}
|
10
|
+
assert_equal(["penguins/adelie.csv", "penguins/gentoo.csv", "penguins/chinstrap.csv"],
|
11
|
+
data_paths)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
sub_test_case("Adelie") do
|
16
|
+
def setup
|
17
|
+
@dataset = Datasets::PenguinsRawData::Adelie.new
|
18
|
+
end
|
19
|
+
|
20
|
+
test("#each") do
|
21
|
+
records = @dataset.each.to_a
|
22
|
+
assert_equal([ 152,
|
23
|
+
{
|
24
|
+
study_name: "PAL0708",
|
25
|
+
sample_number: 1,
|
26
|
+
species: "Adelie Penguin (Pygoscelis adeliae)",
|
27
|
+
region: "Anvers",
|
28
|
+
island: "Torgersen",
|
29
|
+
stage: "Adult, 1 Egg Stage",
|
30
|
+
individual_id: "N1A1",
|
31
|
+
clutch_completion: "Yes",
|
32
|
+
date_egg: DateTime.new(2007, 11, 11),
|
33
|
+
culmen_length_mm: 39.1,
|
34
|
+
culmen_depth_mm: 18.7,
|
35
|
+
flipper_length_mm: 181,
|
36
|
+
body_mass_g: 3750,
|
37
|
+
sex: "MALE",
|
38
|
+
delta_15_n_permil: nil,
|
39
|
+
delta_13_c_permil: nil,
|
40
|
+
comments: "Not enough blood for isotopes."
|
41
|
+
},
|
42
|
+
{
|
43
|
+
study_name: "PAL0910",
|
44
|
+
sample_number: 152,
|
45
|
+
species: "Adelie Penguin (Pygoscelis adeliae)",
|
46
|
+
region: "Anvers",
|
47
|
+
island: "Dream",
|
48
|
+
stage: "Adult, 1 Egg Stage",
|
49
|
+
individual_id: "N85A2",
|
50
|
+
clutch_completion: "Yes",
|
51
|
+
date_egg: DateTime.new(2009, 11, 17),
|
52
|
+
culmen_length_mm: 41.5,
|
53
|
+
culmen_depth_mm: 18.5,
|
54
|
+
flipper_length_mm: 201,
|
55
|
+
body_mass_g: 4000,
|
56
|
+
sex: "MALE",
|
57
|
+
delta_15_n_permil: 8.89640,
|
58
|
+
delta_13_c_permil: -26.06967,
|
59
|
+
comments: nil
|
60
|
+
}
|
61
|
+
],
|
62
|
+
[
|
63
|
+
records.size,
|
64
|
+
records[0].to_h,
|
65
|
+
records[-1].to_h
|
66
|
+
])
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
sub_test_case("Gentoo") do
|
71
|
+
def setup
|
72
|
+
@dataset = Datasets::PenguinsRawData::Gentoo.new
|
73
|
+
end
|
74
|
+
|
75
|
+
test("#each") do
|
76
|
+
records = @dataset.each.to_a
|
77
|
+
assert_equal([ 124,
|
78
|
+
{
|
79
|
+
study_name: "PAL0708",
|
80
|
+
sample_number: 1,
|
81
|
+
species: "Gentoo penguin (Pygoscelis papua)",
|
82
|
+
region: "Anvers",
|
83
|
+
island: "Biscoe",
|
84
|
+
stage: "Adult, 1 Egg Stage",
|
85
|
+
individual_id: "N31A1",
|
86
|
+
clutch_completion: "Yes",
|
87
|
+
date_egg: DateTime.new(2007, 11, 27),
|
88
|
+
culmen_length_mm: 46.1,
|
89
|
+
culmen_depth_mm: 13.2,
|
90
|
+
flipper_length_mm: 211,
|
91
|
+
body_mass_g: 4500,
|
92
|
+
sex: "FEMALE",
|
93
|
+
delta_15_n_permil: 7.993,
|
94
|
+
delta_13_c_permil: -25.5139,
|
95
|
+
comments: nil
|
96
|
+
},
|
97
|
+
{
|
98
|
+
study_name: "PAL0910",
|
99
|
+
sample_number: 124,
|
100
|
+
species: "Gentoo penguin (Pygoscelis papua)",
|
101
|
+
region: "Anvers",
|
102
|
+
island: "Biscoe",
|
103
|
+
stage: "Adult, 1 Egg Stage",
|
104
|
+
individual_id: "N43A2",
|
105
|
+
clutch_completion: "Yes",
|
106
|
+
date_egg: DateTime.new(2009, 11, 22),
|
107
|
+
culmen_length_mm: 49.9,
|
108
|
+
culmen_depth_mm: 16.1,
|
109
|
+
flipper_length_mm: 213,
|
110
|
+
body_mass_g: 5400,
|
111
|
+
sex: "MALE",
|
112
|
+
delta_15_n_permil: 8.3639,
|
113
|
+
delta_13_c_permil: -26.15531,
|
114
|
+
comments: nil
|
115
|
+
}
|
116
|
+
],
|
117
|
+
[
|
118
|
+
records.size,
|
119
|
+
records[0].to_h,
|
120
|
+
records[-1].to_h
|
121
|
+
])
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
sub_test_case("Chinstrap") do
|
126
|
+
def setup
|
127
|
+
@dataset = Datasets::PenguinsRawData::Chinstrap.new
|
128
|
+
end
|
129
|
+
|
130
|
+
test("#each") do
|
131
|
+
records = @dataset.each.to_a
|
132
|
+
assert_equal([ 68,
|
133
|
+
{
|
134
|
+
study_name: "PAL0708",
|
135
|
+
sample_number: 1,
|
136
|
+
species: "Chinstrap penguin (Pygoscelis antarctica)",
|
137
|
+
region: "Anvers",
|
138
|
+
island: "Dream",
|
139
|
+
stage: "Adult, 1 Egg Stage",
|
140
|
+
individual_id: "N61A1",
|
141
|
+
clutch_completion: "No",
|
142
|
+
date_egg: DateTime.new(2007, 11, 19),
|
143
|
+
culmen_length_mm: 46.5,
|
144
|
+
culmen_depth_mm: 17.9,
|
145
|
+
flipper_length_mm: 192,
|
146
|
+
body_mass_g: 3500,
|
147
|
+
sex: "FEMALE",
|
148
|
+
delta_15_n_permil: 9.03935,
|
149
|
+
delta_13_c_permil: -24.30229,
|
150
|
+
comments: "Nest never observed with full clutch."
|
151
|
+
},
|
152
|
+
{
|
153
|
+
study_name: "PAL0910",
|
154
|
+
sample_number: 68,
|
155
|
+
species: "Chinstrap penguin (Pygoscelis antarctica)",
|
156
|
+
region: "Anvers",
|
157
|
+
island: "Dream",
|
158
|
+
stage: "Adult, 1 Egg Stage",
|
159
|
+
individual_id: "N100A2",
|
160
|
+
clutch_completion: "Yes",
|
161
|
+
date_egg: DateTime.new(2009, 11, 21),
|
162
|
+
culmen_length_mm: 50.2,
|
163
|
+
culmen_depth_mm: 18.7,
|
164
|
+
flipper_length_mm: 198,
|
165
|
+
body_mass_g: 3775,
|
166
|
+
sex: "FEMALE",
|
167
|
+
delta_15_n_permil: 9.39305,
|
168
|
+
delta_13_c_permil: -24.25255,
|
169
|
+
comments: nil
|
170
|
+
}
|
171
|
+
],
|
172
|
+
[
|
173
|
+
records.size,
|
174
|
+
records[0].to_h,
|
175
|
+
records[-1].to_h
|
176
|
+
])
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
sub_test_case("Penguins") do
|
181
|
+
def setup
|
182
|
+
@dataset = Datasets::Penguins.new
|
183
|
+
end
|
184
|
+
|
185
|
+
test("order of species") do
|
186
|
+
species_values = @dataset.map {|r| r.species }.uniq
|
187
|
+
assert_equal(["Adelie", "Chinstrap", "Gentoo"],
|
188
|
+
species_values)
|
189
|
+
end
|
190
|
+
|
191
|
+
test("data cleansing") do
|
192
|
+
sex_values = @dataset.map {|r| r.sex }.uniq.compact.sort
|
193
|
+
assert_equal(["female", "male"],
|
194
|
+
sex_values)
|
195
|
+
end
|
196
|
+
|
197
|
+
test("#each") do
|
198
|
+
records = @dataset.each.to_a
|
199
|
+
assert_equal([
|
200
|
+
344,
|
201
|
+
{
|
202
|
+
species: "Adelie",
|
203
|
+
island: "Torgersen",
|
204
|
+
bill_length_mm: 39.1,
|
205
|
+
bill_depth_mm: 18.7,
|
206
|
+
flipper_length_mm: 181,
|
207
|
+
body_mass_g: 3750,
|
208
|
+
sex: "male",
|
209
|
+
year: 2007
|
210
|
+
},
|
211
|
+
{
|
212
|
+
species: "Chinstrap",
|
213
|
+
island: "Dream",
|
214
|
+
bill_length_mm: 46.5,
|
215
|
+
bill_depth_mm: 17.9,
|
216
|
+
flipper_length_mm: 192,
|
217
|
+
body_mass_g: 3500,
|
218
|
+
sex: "female",
|
219
|
+
year: 2007
|
220
|
+
},
|
221
|
+
{
|
222
|
+
species: "Gentoo",
|
223
|
+
island: "Biscoe",
|
224
|
+
bill_length_mm: 46.1,
|
225
|
+
bill_depth_mm: 13.2,
|
226
|
+
flipper_length_mm: 211,
|
227
|
+
body_mass_g: 4500,
|
228
|
+
sex: "female",
|
229
|
+
year: 2007
|
230
|
+
},
|
231
|
+
{
|
232
|
+
species: "Gentoo",
|
233
|
+
island: "Biscoe",
|
234
|
+
bill_length_mm: 49.9,
|
235
|
+
bill_depth_mm: 16.1,
|
236
|
+
flipper_length_mm: 213,
|
237
|
+
body_mass_g: 5400,
|
238
|
+
sex: "male",
|
239
|
+
year: 2009
|
240
|
+
}
|
241
|
+
],
|
242
|
+
[
|
243
|
+
records.size,
|
244
|
+
records[0].to_h,
|
245
|
+
records[152].to_h,
|
246
|
+
records[220].to_h,
|
247
|
+
records[-1].to_h,
|
248
|
+
])
|
249
|
+
end
|
250
|
+
end
|
251
|
+
end
|