red-datasets 0.0.8 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,80 @@
1
+ class MushroomTest < Test::Unit::TestCase
2
+ def setup
3
+ @dataset = Datasets::Mushroom.new
4
+ end
5
+
6
+ def record(*args)
7
+ Datasets::Mushroom::Record.new(*args)
8
+ end
9
+
10
+ test("#each") do
11
+ records = @dataset.each.to_a
12
+ assert_equal([
13
+ 8124,
14
+ {
15
+ :label => "poisonous",
16
+ :cap_shape => "convex",
17
+ :cap_surface => "smooth",
18
+ :cap_color => "brown",
19
+ :bruises => "bruises",
20
+ :odor => "pungent",
21
+ :gill_attachment => "free",
22
+ :gill_spacing => "close",
23
+ :gill_size => "narrow",
24
+ :gill_color => "black",
25
+ :stalk_shape => "enlarging",
26
+ :stalk_root => "equal",
27
+ :stalk_surface_above_ring => "smooth",
28
+ :stalk_surface_below_ring => "smooth",
29
+ :stalk_color_above_ring => "white",
30
+ :stalk_color_below_ring => "white",
31
+ :veil_type => "partial",
32
+ :veil_color => "white",
33
+ :n_rings => 1,
34
+ :ring_type => "pendant",
35
+ :spore_print_color => "black",
36
+ :population => "scattered",
37
+ :habitat => "urban"
38
+ },
39
+ {
40
+ :label => "edible",
41
+ :cap_shape => "convex",
42
+ :cap_surface => "smooth",
43
+ :cap_color => "brown",
44
+ :bruises => "no",
45
+ :odor => "none",
46
+ :gill_attachment => "attached",
47
+ :gill_spacing => "close",
48
+ :gill_size => "broad",
49
+ :gill_color => "yellow",
50
+ :stalk_shape => "enlarging",
51
+ :stalk_root => "missing",
52
+ :stalk_surface_above_ring => "smooth",
53
+ :stalk_surface_below_ring => "smooth",
54
+ :stalk_color_above_ring => "orange",
55
+ :stalk_color_below_ring => "orange",
56
+ :veil_type => "partial",
57
+ :veil_color => "orange",
58
+ :n_rings => 1,
59
+ :ring_type => "pendant",
60
+ :spore_print_color => "orange",
61
+ :population => "clustered",
62
+ :habitat => "leaves"
63
+ }
64
+ ],
65
+ [
66
+ records.size,
67
+ records[0].to_h,
68
+ records[-1].to_h
69
+ ])
70
+ end
71
+
72
+ sub_test_case("#metadata") do
73
+ test("#description") do
74
+ description = @dataset.metadata.description
75
+ assert do
76
+ description.start_with?("1. Title: Mushroom Database")
77
+ end
78
+ end
79
+ end
80
+ end
@@ -0,0 +1,251 @@
1
+ class PenguinsTest < Test::Unit::TestCase
2
+ sub_test_case("PenguinsRawData::SpeciesBase") do
3
+ test("#data_path") do
4
+ data_paths = [ Datasets::PenguinsRawData::Adelie,
5
+ Datasets::PenguinsRawData::Gentoo,
6
+ Datasets::PenguinsRawData::Chinstrap ].map {|cls|
7
+ dataset = cls.new
8
+ dataset.data_path.relative_path_from(dataset.send(:cache_dir_path)).to_s
9
+ }
10
+ assert_equal(["penguins/adelie.csv", "penguins/gentoo.csv", "penguins/chinstrap.csv"],
11
+ data_paths)
12
+ end
13
+ end
14
+
15
+ sub_test_case("Adelie") do
16
+ def setup
17
+ @dataset = Datasets::PenguinsRawData::Adelie.new
18
+ end
19
+
20
+ test("#each") do
21
+ records = @dataset.each.to_a
22
+ assert_equal([ 152,
23
+ {
24
+ study_name: "PAL0708",
25
+ sample_number: 1,
26
+ species: "Adelie Penguin (Pygoscelis adeliae)",
27
+ region: "Anvers",
28
+ island: "Torgersen",
29
+ stage: "Adult, 1 Egg Stage",
30
+ individual_id: "N1A1",
31
+ clutch_completion: "Yes",
32
+ date_egg: DateTime.new(2007, 11, 11),
33
+ culmen_length_mm: 39.1,
34
+ culmen_depth_mm: 18.7,
35
+ flipper_length_mm: 181,
36
+ body_mass_g: 3750,
37
+ sex: "MALE",
38
+ delta_15_n_permil: nil,
39
+ delta_13_c_permil: nil,
40
+ comments: "Not enough blood for isotopes."
41
+ },
42
+ {
43
+ study_name: "PAL0910",
44
+ sample_number: 152,
45
+ species: "Adelie Penguin (Pygoscelis adeliae)",
46
+ region: "Anvers",
47
+ island: "Dream",
48
+ stage: "Adult, 1 Egg Stage",
49
+ individual_id: "N85A2",
50
+ clutch_completion: "Yes",
51
+ date_egg: DateTime.new(2009, 11, 17),
52
+ culmen_length_mm: 41.5,
53
+ culmen_depth_mm: 18.5,
54
+ flipper_length_mm: 201,
55
+ body_mass_g: 4000,
56
+ sex: "MALE",
57
+ delta_15_n_permil: 8.89640,
58
+ delta_13_c_permil: -26.06967,
59
+ comments: nil
60
+ }
61
+ ],
62
+ [
63
+ records.size,
64
+ records[0].to_h,
65
+ records[-1].to_h
66
+ ])
67
+ end
68
+ end
69
+
70
+ sub_test_case("Gentoo") do
71
+ def setup
72
+ @dataset = Datasets::PenguinsRawData::Gentoo.new
73
+ end
74
+
75
+ test("#each") do
76
+ records = @dataset.each.to_a
77
+ assert_equal([ 124,
78
+ {
79
+ study_name: "PAL0708",
80
+ sample_number: 1,
81
+ species: "Gentoo penguin (Pygoscelis papua)",
82
+ region: "Anvers",
83
+ island: "Biscoe",
84
+ stage: "Adult, 1 Egg Stage",
85
+ individual_id: "N31A1",
86
+ clutch_completion: "Yes",
87
+ date_egg: DateTime.new(2007, 11, 27),
88
+ culmen_length_mm: 46.1,
89
+ culmen_depth_mm: 13.2,
90
+ flipper_length_mm: 211,
91
+ body_mass_g: 4500,
92
+ sex: "FEMALE",
93
+ delta_15_n_permil: 7.993,
94
+ delta_13_c_permil: -25.5139,
95
+ comments: nil
96
+ },
97
+ {
98
+ study_name: "PAL0910",
99
+ sample_number: 124,
100
+ species: "Gentoo penguin (Pygoscelis papua)",
101
+ region: "Anvers",
102
+ island: "Biscoe",
103
+ stage: "Adult, 1 Egg Stage",
104
+ individual_id: "N43A2",
105
+ clutch_completion: "Yes",
106
+ date_egg: DateTime.new(2009, 11, 22),
107
+ culmen_length_mm: 49.9,
108
+ culmen_depth_mm: 16.1,
109
+ flipper_length_mm: 213,
110
+ body_mass_g: 5400,
111
+ sex: "MALE",
112
+ delta_15_n_permil: 8.3639,
113
+ delta_13_c_permil: -26.15531,
114
+ comments: nil
115
+ }
116
+ ],
117
+ [
118
+ records.size,
119
+ records[0].to_h,
120
+ records[-1].to_h
121
+ ])
122
+ end
123
+ end
124
+
125
+ sub_test_case("Chinstrap") do
126
+ def setup
127
+ @dataset = Datasets::PenguinsRawData::Chinstrap.new
128
+ end
129
+
130
+ test("#each") do
131
+ records = @dataset.each.to_a
132
+ assert_equal([ 68,
133
+ {
134
+ study_name: "PAL0708",
135
+ sample_number: 1,
136
+ species: "Chinstrap penguin (Pygoscelis antarctica)",
137
+ region: "Anvers",
138
+ island: "Dream",
139
+ stage: "Adult, 1 Egg Stage",
140
+ individual_id: "N61A1",
141
+ clutch_completion: "No",
142
+ date_egg: DateTime.new(2007, 11, 19),
143
+ culmen_length_mm: 46.5,
144
+ culmen_depth_mm: 17.9,
145
+ flipper_length_mm: 192,
146
+ body_mass_g: 3500,
147
+ sex: "FEMALE",
148
+ delta_15_n_permil: 9.03935,
149
+ delta_13_c_permil: -24.30229,
150
+ comments: "Nest never observed with full clutch."
151
+ },
152
+ {
153
+ study_name: "PAL0910",
154
+ sample_number: 68,
155
+ species: "Chinstrap penguin (Pygoscelis antarctica)",
156
+ region: "Anvers",
157
+ island: "Dream",
158
+ stage: "Adult, 1 Egg Stage",
159
+ individual_id: "N100A2",
160
+ clutch_completion: "Yes",
161
+ date_egg: DateTime.new(2009, 11, 21),
162
+ culmen_length_mm: 50.2,
163
+ culmen_depth_mm: 18.7,
164
+ flipper_length_mm: 198,
165
+ body_mass_g: 3775,
166
+ sex: "FEMALE",
167
+ delta_15_n_permil: 9.39305,
168
+ delta_13_c_permil: -24.25255,
169
+ comments: nil
170
+ }
171
+ ],
172
+ [
173
+ records.size,
174
+ records[0].to_h,
175
+ records[-1].to_h
176
+ ])
177
+ end
178
+ end
179
+
180
+ sub_test_case("Penguins") do
181
+ def setup
182
+ @dataset = Datasets::Penguins.new
183
+ end
184
+
185
+ test("order of species") do
186
+ species_values = @dataset.map {|r| r.species }.uniq
187
+ assert_equal(["Adelie", "Chinstrap", "Gentoo"],
188
+ species_values)
189
+ end
190
+
191
+ test("data cleansing") do
192
+ sex_values = @dataset.map {|r| r.sex }.uniq.compact.sort
193
+ assert_equal(["female", "male"],
194
+ sex_values)
195
+ end
196
+
197
+ test("#each") do
198
+ records = @dataset.each.to_a
199
+ assert_equal([
200
+ 344,
201
+ {
202
+ species: "Adelie",
203
+ island: "Torgersen",
204
+ bill_length_mm: 39.1,
205
+ bill_depth_mm: 18.7,
206
+ flipper_length_mm: 181,
207
+ body_mass_g: 3750,
208
+ sex: "male",
209
+ year: 2007
210
+ },
211
+ {
212
+ species: "Chinstrap",
213
+ island: "Dream",
214
+ bill_length_mm: 46.5,
215
+ bill_depth_mm: 17.9,
216
+ flipper_length_mm: 192,
217
+ body_mass_g: 3500,
218
+ sex: "female",
219
+ year: 2007
220
+ },
221
+ {
222
+ species: "Gentoo",
223
+ island: "Biscoe",
224
+ bill_length_mm: 46.1,
225
+ bill_depth_mm: 13.2,
226
+ flipper_length_mm: 211,
227
+ body_mass_g: 4500,
228
+ sex: "female",
229
+ year: 2007
230
+ },
231
+ {
232
+ species: "Gentoo",
233
+ island: "Biscoe",
234
+ bill_length_mm: 49.9,
235
+ bill_depth_mm: 16.1,
236
+ flipper_length_mm: 213,
237
+ body_mass_g: 5400,
238
+ sex: "male",
239
+ year: 2009
240
+ }
241
+ ],
242
+ [
243
+ records.size,
244
+ records[0].to_h,
245
+ records[152].to_h,
246
+ records[220].to_h,
247
+ records[-1].to_h,
248
+ ])
249
+ end
250
+ end
251
+ end
@@ -0,0 +1,136 @@
1
+ class RdatasetsTest < Test::Unit::TestCase
2
+ sub_test_case("RdatasetsList") do
3
+ def setup
4
+ @dataset = Datasets::RdatasetsList.new
5
+ end
6
+
7
+ sub_test_case("#each") do
8
+ test("with package_name") do
9
+ records = @dataset.filter(package: "datasets").to_a
10
+ assert_equal([
11
+ 84,
12
+ {
13
+ package: "datasets",
14
+ dataset: "ability.cov",
15
+ title: "Ability and Intelligence Tests",
16
+ rows: 6,
17
+ cols: 8,
18
+ n_binary: 0,
19
+ n_character: 0,
20
+ n_factor: 0,
21
+ n_logical: 0,
22
+ n_numeric: 8,
23
+ csv: "https://vincentarelbundock.github.io/Rdatasets/csv/datasets/ability.cov.csv",
24
+ doc: "https://vincentarelbundock.github.io/Rdatasets/doc/datasets/ability.cov.html"
25
+ },
26
+ {
27
+ package: "datasets",
28
+ dataset: "WWWusage",
29
+ title: "Internet Usage per Minute",
30
+ rows: 100,
31
+ cols: 2,
32
+ n_binary: 0,
33
+ n_character: 0,
34
+ n_factor: 0,
35
+ n_logical: 0,
36
+ n_numeric: 2,
37
+ csv: "https://vincentarelbundock.github.io/Rdatasets/csv/datasets/WWWusage.csv",
38
+ doc: "https://vincentarelbundock.github.io/Rdatasets/doc/datasets/WWWusage.html"
39
+ }
40
+ ],
41
+ [
42
+ records.size,
43
+ records[0].to_h,
44
+ records[-1].to_h
45
+ ])
46
+ end
47
+
48
+ test("without package_name") do
49
+ records = @dataset.each.to_a
50
+ assert_equal([
51
+ 1714,
52
+ {
53
+ package: "AER",
54
+ dataset: "Affairs",
55
+ title: "Fair's Extramarital Affairs Data",
56
+ rows: 601,
57
+ cols: 9,
58
+ n_binary: 2,
59
+ n_character: 0,
60
+ n_factor: 2,
61
+ n_logical: 0,
62
+ n_numeric: 7,
63
+ csv: "https://vincentarelbundock.github.io/Rdatasets/csv/AER/Affairs.csv",
64
+ doc: "https://vincentarelbundock.github.io/Rdatasets/doc/AER/Affairs.html"
65
+ },
66
+ {
67
+ package: "vcd",
68
+ dataset: "WomenQueue",
69
+ title: "Women in Queues",
70
+ rows: 11,
71
+ cols: 2,
72
+ n_binary: 0,
73
+ n_character: 0,
74
+ n_factor: 1,
75
+ n_logical: 0,
76
+ n_numeric: 1,
77
+ csv: "https://vincentarelbundock.github.io/Rdatasets/csv/vcd/WomenQueue.csv",
78
+ doc: "https://vincentarelbundock.github.io/Rdatasets/doc/vcd/WomenQueue.html"
79
+ },
80
+ ],
81
+ [
82
+ records.size,
83
+ records[0].to_h,
84
+ records[-1].to_h
85
+ ])
86
+ end
87
+ end
88
+ end
89
+
90
+ sub_test_case("Rdatasets") do
91
+ sub_test_case("datasets") do
92
+ sub_test_case("AirPassengers") do
93
+ def setup
94
+ @dataset = Datasets::Rdatasets.new("datasets", "AirPassengers")
95
+ end
96
+
97
+ test("#each") do
98
+ records = @dataset.each.to_a
99
+ assert_equal([
100
+ 144,
101
+ { time: 1949, value: 112 },
102
+ { time: 1960.91666666667, value: 432 },
103
+ ],
104
+ [
105
+ records.size,
106
+ records[0],
107
+ records[-1]
108
+ ])
109
+ end
110
+
111
+ test("#metadata.id") do
112
+ assert_equal("rdatasets-datasets-AirPassengers", @dataset.metadata.id)
113
+ end
114
+
115
+ test("#metadata.description") do
116
+ description = @dataset.metadata.description
117
+ assert do
118
+ description.include?("Monthly Airline Passenger Numbers 1949-1960")
119
+ end
120
+ end
121
+ end
122
+
123
+ test("invalid dataset name") do
124
+ assert_raise(ArgumentError) do
125
+ Datasets::Rdatasets.new("datasets", "invalid datasets name")
126
+ end
127
+ end
128
+ end
129
+
130
+ test("invalid package name") do
131
+ assert_raise(ArgumentError) do
132
+ Datasets::Rdatasets.new("invalid package name", "AirPassengers")
133
+ end
134
+ end
135
+ end
136
+ end