red-datasets 0.0.8 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,80 @@
1
+ class MushroomTest < Test::Unit::TestCase
2
+ def setup
3
+ @dataset = Datasets::Mushroom.new
4
+ end
5
+
6
+ def record(*args)
7
+ Datasets::Mushroom::Record.new(*args)
8
+ end
9
+
10
+ test("#each") do
11
+ records = @dataset.each.to_a
12
+ assert_equal([
13
+ 8124,
14
+ {
15
+ :label => "poisonous",
16
+ :cap_shape => "convex",
17
+ :cap_surface => "smooth",
18
+ :cap_color => "brown",
19
+ :bruises => "bruises",
20
+ :odor => "pungent",
21
+ :gill_attachment => "free",
22
+ :gill_spacing => "close",
23
+ :gill_size => "narrow",
24
+ :gill_color => "black",
25
+ :stalk_shape => "enlarging",
26
+ :stalk_root => "equal",
27
+ :stalk_surface_above_ring => "smooth",
28
+ :stalk_surface_below_ring => "smooth",
29
+ :stalk_color_above_ring => "white",
30
+ :stalk_color_below_ring => "white",
31
+ :veil_type => "partial",
32
+ :veil_color => "white",
33
+ :n_rings => 1,
34
+ :ring_type => "pendant",
35
+ :spore_print_color => "black",
36
+ :population => "scattered",
37
+ :habitat => "urban"
38
+ },
39
+ {
40
+ :label => "edible",
41
+ :cap_shape => "convex",
42
+ :cap_surface => "smooth",
43
+ :cap_color => "brown",
44
+ :bruises => "no",
45
+ :odor => "none",
46
+ :gill_attachment => "attached",
47
+ :gill_spacing => "close",
48
+ :gill_size => "broad",
49
+ :gill_color => "yellow",
50
+ :stalk_shape => "enlarging",
51
+ :stalk_root => "missing",
52
+ :stalk_surface_above_ring => "smooth",
53
+ :stalk_surface_below_ring => "smooth",
54
+ :stalk_color_above_ring => "orange",
55
+ :stalk_color_below_ring => "orange",
56
+ :veil_type => "partial",
57
+ :veil_color => "orange",
58
+ :n_rings => 1,
59
+ :ring_type => "pendant",
60
+ :spore_print_color => "orange",
61
+ :population => "clustered",
62
+ :habitat => "leaves"
63
+ }
64
+ ],
65
+ [
66
+ records.size,
67
+ records[0].to_h,
68
+ records[-1].to_h
69
+ ])
70
+ end
71
+
72
+ sub_test_case("#metadata") do
73
+ test("#description") do
74
+ description = @dataset.metadata.description
75
+ assert do
76
+ description.start_with?("1. Title: Mushroom Database")
77
+ end
78
+ end
79
+ end
80
+ end
@@ -0,0 +1,251 @@
1
+ class PenguinsTest < Test::Unit::TestCase
2
+ sub_test_case("PenguinsRawData::SpeciesBase") do
3
+ test("#data_path") do
4
+ data_paths = [ Datasets::PenguinsRawData::Adelie,
5
+ Datasets::PenguinsRawData::Gentoo,
6
+ Datasets::PenguinsRawData::Chinstrap ].map {|cls|
7
+ dataset = cls.new
8
+ dataset.data_path.relative_path_from(dataset.send(:cache_dir_path)).to_s
9
+ }
10
+ assert_equal(["penguins/adelie.csv", "penguins/gentoo.csv", "penguins/chinstrap.csv"],
11
+ data_paths)
12
+ end
13
+ end
14
+
15
+ sub_test_case("Adelie") do
16
+ def setup
17
+ @dataset = Datasets::PenguinsRawData::Adelie.new
18
+ end
19
+
20
+ test("#each") do
21
+ records = @dataset.each.to_a
22
+ assert_equal([ 152,
23
+ {
24
+ study_name: "PAL0708",
25
+ sample_number: 1,
26
+ species: "Adelie Penguin (Pygoscelis adeliae)",
27
+ region: "Anvers",
28
+ island: "Torgersen",
29
+ stage: "Adult, 1 Egg Stage",
30
+ individual_id: "N1A1",
31
+ clutch_completion: "Yes",
32
+ date_egg: DateTime.new(2007, 11, 11),
33
+ culmen_length_mm: 39.1,
34
+ culmen_depth_mm: 18.7,
35
+ flipper_length_mm: 181,
36
+ body_mass_g: 3750,
37
+ sex: "MALE",
38
+ delta_15_n_permil: nil,
39
+ delta_13_c_permil: nil,
40
+ comments: "Not enough blood for isotopes."
41
+ },
42
+ {
43
+ study_name: "PAL0910",
44
+ sample_number: 152,
45
+ species: "Adelie Penguin (Pygoscelis adeliae)",
46
+ region: "Anvers",
47
+ island: "Dream",
48
+ stage: "Adult, 1 Egg Stage",
49
+ individual_id: "N85A2",
50
+ clutch_completion: "Yes",
51
+ date_egg: DateTime.new(2009, 11, 17),
52
+ culmen_length_mm: 41.5,
53
+ culmen_depth_mm: 18.5,
54
+ flipper_length_mm: 201,
55
+ body_mass_g: 4000,
56
+ sex: "MALE",
57
+ delta_15_n_permil: 8.89640,
58
+ delta_13_c_permil: -26.06967,
59
+ comments: nil
60
+ }
61
+ ],
62
+ [
63
+ records.size,
64
+ records[0].to_h,
65
+ records[-1].to_h
66
+ ])
67
+ end
68
+ end
69
+
70
+ sub_test_case("Gentoo") do
71
+ def setup
72
+ @dataset = Datasets::PenguinsRawData::Gentoo.new
73
+ end
74
+
75
+ test("#each") do
76
+ records = @dataset.each.to_a
77
+ assert_equal([ 124,
78
+ {
79
+ study_name: "PAL0708",
80
+ sample_number: 1,
81
+ species: "Gentoo penguin (Pygoscelis papua)",
82
+ region: "Anvers",
83
+ island: "Biscoe",
84
+ stage: "Adult, 1 Egg Stage",
85
+ individual_id: "N31A1",
86
+ clutch_completion: "Yes",
87
+ date_egg: DateTime.new(2007, 11, 27),
88
+ culmen_length_mm: 46.1,
89
+ culmen_depth_mm: 13.2,
90
+ flipper_length_mm: 211,
91
+ body_mass_g: 4500,
92
+ sex: "FEMALE",
93
+ delta_15_n_permil: 7.993,
94
+ delta_13_c_permil: -25.5139,
95
+ comments: nil
96
+ },
97
+ {
98
+ study_name: "PAL0910",
99
+ sample_number: 124,
100
+ species: "Gentoo penguin (Pygoscelis papua)",
101
+ region: "Anvers",
102
+ island: "Biscoe",
103
+ stage: "Adult, 1 Egg Stage",
104
+ individual_id: "N43A2",
105
+ clutch_completion: "Yes",
106
+ date_egg: DateTime.new(2009, 11, 22),
107
+ culmen_length_mm: 49.9,
108
+ culmen_depth_mm: 16.1,
109
+ flipper_length_mm: 213,
110
+ body_mass_g: 5400,
111
+ sex: "MALE",
112
+ delta_15_n_permil: 8.3639,
113
+ delta_13_c_permil: -26.15531,
114
+ comments: nil
115
+ }
116
+ ],
117
+ [
118
+ records.size,
119
+ records[0].to_h,
120
+ records[-1].to_h
121
+ ])
122
+ end
123
+ end
124
+
125
+ sub_test_case("Chinstrap") do
126
+ def setup
127
+ @dataset = Datasets::PenguinsRawData::Chinstrap.new
128
+ end
129
+
130
+ test("#each") do
131
+ records = @dataset.each.to_a
132
+ assert_equal([ 68,
133
+ {
134
+ study_name: "PAL0708",
135
+ sample_number: 1,
136
+ species: "Chinstrap penguin (Pygoscelis antarctica)",
137
+ region: "Anvers",
138
+ island: "Dream",
139
+ stage: "Adult, 1 Egg Stage",
140
+ individual_id: "N61A1",
141
+ clutch_completion: "No",
142
+ date_egg: DateTime.new(2007, 11, 19),
143
+ culmen_length_mm: 46.5,
144
+ culmen_depth_mm: 17.9,
145
+ flipper_length_mm: 192,
146
+ body_mass_g: 3500,
147
+ sex: "FEMALE",
148
+ delta_15_n_permil: 9.03935,
149
+ delta_13_c_permil: -24.30229,
150
+ comments: "Nest never observed with full clutch."
151
+ },
152
+ {
153
+ study_name: "PAL0910",
154
+ sample_number: 68,
155
+ species: "Chinstrap penguin (Pygoscelis antarctica)",
156
+ region: "Anvers",
157
+ island: "Dream",
158
+ stage: "Adult, 1 Egg Stage",
159
+ individual_id: "N100A2",
160
+ clutch_completion: "Yes",
161
+ date_egg: DateTime.new(2009, 11, 21),
162
+ culmen_length_mm: 50.2,
163
+ culmen_depth_mm: 18.7,
164
+ flipper_length_mm: 198,
165
+ body_mass_g: 3775,
166
+ sex: "FEMALE",
167
+ delta_15_n_permil: 9.39305,
168
+ delta_13_c_permil: -24.25255,
169
+ comments: nil
170
+ }
171
+ ],
172
+ [
173
+ records.size,
174
+ records[0].to_h,
175
+ records[-1].to_h
176
+ ])
177
+ end
178
+ end
179
+
180
+ sub_test_case("Penguins") do
181
+ def setup
182
+ @dataset = Datasets::Penguins.new
183
+ end
184
+
185
+ test("order of species") do
186
+ species_values = @dataset.map {|r| r.species }.uniq
187
+ assert_equal(["Adelie", "Chinstrap", "Gentoo"],
188
+ species_values)
189
+ end
190
+
191
+ test("data cleansing") do
192
+ sex_values = @dataset.map {|r| r.sex }.uniq.compact.sort
193
+ assert_equal(["female", "male"],
194
+ sex_values)
195
+ end
196
+
197
+ test("#each") do
198
+ records = @dataset.each.to_a
199
+ assert_equal([
200
+ 344,
201
+ {
202
+ species: "Adelie",
203
+ island: "Torgersen",
204
+ bill_length_mm: 39.1,
205
+ bill_depth_mm: 18.7,
206
+ flipper_length_mm: 181,
207
+ body_mass_g: 3750,
208
+ sex: "male",
209
+ year: 2007
210
+ },
211
+ {
212
+ species: "Chinstrap",
213
+ island: "Dream",
214
+ bill_length_mm: 46.5,
215
+ bill_depth_mm: 17.9,
216
+ flipper_length_mm: 192,
217
+ body_mass_g: 3500,
218
+ sex: "female",
219
+ year: 2007
220
+ },
221
+ {
222
+ species: "Gentoo",
223
+ island: "Biscoe",
224
+ bill_length_mm: 46.1,
225
+ bill_depth_mm: 13.2,
226
+ flipper_length_mm: 211,
227
+ body_mass_g: 4500,
228
+ sex: "female",
229
+ year: 2007
230
+ },
231
+ {
232
+ species: "Gentoo",
233
+ island: "Biscoe",
234
+ bill_length_mm: 49.9,
235
+ bill_depth_mm: 16.1,
236
+ flipper_length_mm: 213,
237
+ body_mass_g: 5400,
238
+ sex: "male",
239
+ year: 2009
240
+ }
241
+ ],
242
+ [
243
+ records.size,
244
+ records[0].to_h,
245
+ records[152].to_h,
246
+ records[220].to_h,
247
+ records[-1].to_h,
248
+ ])
249
+ end
250
+ end
251
+ end
@@ -0,0 +1,136 @@
1
+ class RdatasetsTest < Test::Unit::TestCase
2
+ sub_test_case("RdatasetsList") do
3
+ def setup
4
+ @dataset = Datasets::RdatasetsList.new
5
+ end
6
+
7
+ sub_test_case("#each") do
8
+ test("with package_name") do
9
+ records = @dataset.filter(package: "datasets").to_a
10
+ assert_equal([
11
+ 84,
12
+ {
13
+ package: "datasets",
14
+ dataset: "ability.cov",
15
+ title: "Ability and Intelligence Tests",
16
+ rows: 6,
17
+ cols: 8,
18
+ n_binary: 0,
19
+ n_character: 0,
20
+ n_factor: 0,
21
+ n_logical: 0,
22
+ n_numeric: 8,
23
+ csv: "https://vincentarelbundock.github.io/Rdatasets/csv/datasets/ability.cov.csv",
24
+ doc: "https://vincentarelbundock.github.io/Rdatasets/doc/datasets/ability.cov.html"
25
+ },
26
+ {
27
+ package: "datasets",
28
+ dataset: "WWWusage",
29
+ title: "Internet Usage per Minute",
30
+ rows: 100,
31
+ cols: 2,
32
+ n_binary: 0,
33
+ n_character: 0,
34
+ n_factor: 0,
35
+ n_logical: 0,
36
+ n_numeric: 2,
37
+ csv: "https://vincentarelbundock.github.io/Rdatasets/csv/datasets/WWWusage.csv",
38
+ doc: "https://vincentarelbundock.github.io/Rdatasets/doc/datasets/WWWusage.html"
39
+ }
40
+ ],
41
+ [
42
+ records.size,
43
+ records[0].to_h,
44
+ records[-1].to_h
45
+ ])
46
+ end
47
+
48
+ test("without package_name") do
49
+ records = @dataset.each.to_a
50
+ assert_equal([
51
+ 1714,
52
+ {
53
+ package: "AER",
54
+ dataset: "Affairs",
55
+ title: "Fair's Extramarital Affairs Data",
56
+ rows: 601,
57
+ cols: 9,
58
+ n_binary: 2,
59
+ n_character: 0,
60
+ n_factor: 2,
61
+ n_logical: 0,
62
+ n_numeric: 7,
63
+ csv: "https://vincentarelbundock.github.io/Rdatasets/csv/AER/Affairs.csv",
64
+ doc: "https://vincentarelbundock.github.io/Rdatasets/doc/AER/Affairs.html"
65
+ },
66
+ {
67
+ package: "vcd",
68
+ dataset: "WomenQueue",
69
+ title: "Women in Queues",
70
+ rows: 11,
71
+ cols: 2,
72
+ n_binary: 0,
73
+ n_character: 0,
74
+ n_factor: 1,
75
+ n_logical: 0,
76
+ n_numeric: 1,
77
+ csv: "https://vincentarelbundock.github.io/Rdatasets/csv/vcd/WomenQueue.csv",
78
+ doc: "https://vincentarelbundock.github.io/Rdatasets/doc/vcd/WomenQueue.html"
79
+ },
80
+ ],
81
+ [
82
+ records.size,
83
+ records[0].to_h,
84
+ records[-1].to_h
85
+ ])
86
+ end
87
+ end
88
+ end
89
+
90
+ sub_test_case("Rdatasets") do
91
+ sub_test_case("datasets") do
92
+ sub_test_case("AirPassengers") do
93
+ def setup
94
+ @dataset = Datasets::Rdatasets.new("datasets", "AirPassengers")
95
+ end
96
+
97
+ test("#each") do
98
+ records = @dataset.each.to_a
99
+ assert_equal([
100
+ 144,
101
+ { time: 1949, value: 112 },
102
+ { time: 1960.91666666667, value: 432 },
103
+ ],
104
+ [
105
+ records.size,
106
+ records[0],
107
+ records[-1]
108
+ ])
109
+ end
110
+
111
+ test("#metadata.id") do
112
+ assert_equal("rdatasets-datasets-AirPassengers", @dataset.metadata.id)
113
+ end
114
+
115
+ test("#metadata.description") do
116
+ description = @dataset.metadata.description
117
+ assert do
118
+ description.include?("Monthly Airline Passenger Numbers 1949-1960")
119
+ end
120
+ end
121
+ end
122
+
123
+ test("invalid dataset name") do
124
+ assert_raise(ArgumentError) do
125
+ Datasets::Rdatasets.new("datasets", "invalid datasets name")
126
+ end
127
+ end
128
+ end
129
+
130
+ test("invalid package name") do
131
+ assert_raise(ArgumentError) do
132
+ Datasets::Rdatasets.new("invalid package name", "AirPassengers")
133
+ end
134
+ end
135
+ end
136
+ end