red-datasets 0.0.7 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +20 -4
- data/doc/text/news.md +102 -0
- data/lib/datasets.rb +19 -9
- data/lib/datasets/adult.rb +4 -3
- data/lib/datasets/cifar.rb +4 -12
- data/lib/datasets/cldr-plurals.rb +385 -0
- data/lib/datasets/communities.rb +198 -0
- data/lib/datasets/dataset.rb +20 -1
- data/lib/datasets/downloader.rb +54 -26
- data/lib/datasets/e-stat-japan.rb +320 -0
- data/lib/datasets/error.rb +4 -0
- data/lib/datasets/hepatitis.rb +207 -0
- data/lib/datasets/libsvm-dataset-list.rb +277 -0
- data/lib/datasets/libsvm.rb +135 -0
- data/lib/datasets/mnist.rb +0 -2
- data/lib/datasets/mushroom.rb +256 -0
- data/lib/datasets/penguins.rb +146 -0
- data/lib/datasets/postal-code-japan.rb +154 -0
- data/lib/datasets/rdatasets.rb +95 -0
- data/lib/datasets/table.rb +83 -3
- data/lib/datasets/tar_gz_readable.rb +14 -0
- data/lib/datasets/version.rb +1 -1
- data/lib/datasets/wikipedia.rb +2 -10
- data/red-datasets.gemspec +4 -0
- data/test/run-test.rb +2 -0
- data/test/test-cldr-plurals.rb +180 -0
- data/test/test-communities.rb +290 -0
- data/test/test-dataset.rb +27 -0
- data/test/test-downloader.rb +29 -0
- data/test/test-e-stat-japan.rb +383 -0
- data/test/test-hepatitis.rb +74 -0
- data/test/test-libsvm-dataset-list.rb +47 -0
- data/test/test-libsvm.rb +205 -0
- data/test/test-mushroom.rb +80 -0
- data/test/test-penguins.rb +251 -0
- data/test/test-postal-code-japan.rb +69 -0
- data/test/test-rdatasets.rb +136 -0
- data/test/test-table.rb +123 -18
- metadata +88 -11
@@ -0,0 +1,290 @@
|
|
1
|
+
class CommunitiesTest < Test::Unit::TestCase
|
2
|
+
def setup
|
3
|
+
@dataset = Datasets::Communities.new
|
4
|
+
end
|
5
|
+
|
6
|
+
def record(*args)
|
7
|
+
Datasets::Communities::Record.new(*args)
|
8
|
+
end
|
9
|
+
|
10
|
+
test('#each') do
|
11
|
+
records = @dataset.each.to_a
|
12
|
+
assert_equal([
|
13
|
+
1994,
|
14
|
+
{
|
15
|
+
:state => 8,
|
16
|
+
:county => nil,
|
17
|
+
:community => nil,
|
18
|
+
:community_name => "Lakewoodcity",
|
19
|
+
:fold => 1,
|
20
|
+
:population => 0.19,
|
21
|
+
:household_size => 0.33,
|
22
|
+
:race_percent_black => 0.02,
|
23
|
+
:race_percent_white => 0.9,
|
24
|
+
:race_percent_asian => 0.12,
|
25
|
+
:race_percent_hispanic => 0.17,
|
26
|
+
:age_percent_12_to_21 => 0.34,
|
27
|
+
:age_percent_12_to_29 => 0.47,
|
28
|
+
:age_percent_16_to_24 => 0.29,
|
29
|
+
:age_percent_65_and_upper => 0.32,
|
30
|
+
:n_people_urban => 0.2,
|
31
|
+
:percent_people_urban => 1,
|
32
|
+
:median_income => 0.37,
|
33
|
+
:percent_households_with_wage => 0.72,
|
34
|
+
:percent_households_with_farm_self => 0.34,
|
35
|
+
:percent_households_with_investment_income => 0.6,
|
36
|
+
:percent_households_with_social_security => 0.29,
|
37
|
+
:percent_households_with_public_assistant => 0.15,
|
38
|
+
:percent_households_with_retire => 0.43,
|
39
|
+
:median_family_income => 0.39,
|
40
|
+
:per_capita_income => 0.4,
|
41
|
+
:per_capita_income_white => 0.39,
|
42
|
+
:per_capita_income_black => 0.32,
|
43
|
+
:per_capita_income_indian => 0.27,
|
44
|
+
:per_capita_income_asian => 0.27,
|
45
|
+
:per_capita_income_other => 0.36,
|
46
|
+
:per_capita_income_hispanic => 0.41,
|
47
|
+
:n_people_under_poverty => 0.08,
|
48
|
+
:percent_people_under_poverty => 0.19,
|
49
|
+
:percent_less_9th_grade => 0.1,
|
50
|
+
:percent_not_high_school_graduate => 0.18,
|
51
|
+
:percent_bachelors_or_more => 0.48,
|
52
|
+
:percent_unemployed => 0.27,
|
53
|
+
:percent_employed => 0.68,
|
54
|
+
:percent_employed_manufacturing => 0.23,
|
55
|
+
:percent_employed_professional_service => 0.41,
|
56
|
+
:percent_occupations_manufacturing => 0.25,
|
57
|
+
:percent_occupations_management_professional => 0.52,
|
58
|
+
:male_percent_divorced => 0.68,
|
59
|
+
:male_percent_never_married => 0.4,
|
60
|
+
:female_percent_divorced => 0.75,
|
61
|
+
:total_percent_divorced => 0.75,
|
62
|
+
:mean_persons_per_family => 0.35,
|
63
|
+
:percent_family_2_parents => 0.55,
|
64
|
+
:percent_kids_2_parents => 0.59,
|
65
|
+
:percent_young_kids_2_parents => 0.61,
|
66
|
+
:percent_teen_2_parents => 0.56,
|
67
|
+
:percent_work_mom_young_kids => 0.74,
|
68
|
+
:percent_work_mom => 0.76,
|
69
|
+
:n_illegals => 0.04,
|
70
|
+
:percent_illegals => 0.14,
|
71
|
+
:n_immigrants => 0.03,
|
72
|
+
:percent_immigrants_recent => 0.24,
|
73
|
+
:percent_immigrants_recent_5 => 0.27,
|
74
|
+
:percent_immigrants_recent_8 => 0.37,
|
75
|
+
:percent_immigrants_recent_10 => 0.39,
|
76
|
+
:percent_population_immigranted_recent => 0.07,
|
77
|
+
:percent_population_immigranted_recent_5 => 0.07,
|
78
|
+
:percent_population_immigranted_recent_8 => 0.08,
|
79
|
+
:percent_population_immigranted_recent_10 => 0.08,
|
80
|
+
:percent_speak_english_only => 0.89,
|
81
|
+
:percent_not_speak_english_well => 0.06,
|
82
|
+
:percent_large_households_family => 0.14,
|
83
|
+
:percent_large_households_occupied => 0.13,
|
84
|
+
:mean_persons_per_occupied_household => 0.33,
|
85
|
+
:mean_persons_per_owner_occupied_household => 0.39,
|
86
|
+
:mean_persons_per_rental_occupied_household => 0.28,
|
87
|
+
:percent_persons_owner_occupied_household => 0.55,
|
88
|
+
:percent_persons_dense_housing => 0.09,
|
89
|
+
:percent_housing_less_3_bedrooms => 0.51,
|
90
|
+
:median_n_bedrooms => 0.5,
|
91
|
+
:n_vacant_households => 0.21,
|
92
|
+
:percent_housing_occupied => 0.71,
|
93
|
+
:percent_housing_owner_occupied => 0.52,
|
94
|
+
:percent_vacant_housing_boarded => 0.05,
|
95
|
+
:percent_vacant_housing_more_6_months => 0.26,
|
96
|
+
:median_year_housing_built => 0.65,
|
97
|
+
:percent_housing_no_phone => 0.14,
|
98
|
+
:percent_housing_without_full_plumbing => 0.06,
|
99
|
+
:owner_occupied_housing_lower_quartile => 0.22,
|
100
|
+
:owner_occupied_housing_median => 0.19,
|
101
|
+
:owner_occupied_housing_higher_quartile => 0.18,
|
102
|
+
:rental_housing_lower_quartile => 0.36,
|
103
|
+
:rental_housing_median => 0.35,
|
104
|
+
:rental_housing_higher_quartile => 0.38,
|
105
|
+
:median_rent => 0.34,
|
106
|
+
:median_rent_percent_household_income => 0.38,
|
107
|
+
:median_owner_cost_percent_household_income => 0.46,
|
108
|
+
:median_owner_cost_percent_household_income_no_mortgage => 0.25,
|
109
|
+
:n_people_shelter => 0.04,
|
110
|
+
:n_people_street => 0,
|
111
|
+
:percent_foreign_born => 0.12,
|
112
|
+
:percent_born_same_state => 0.42,
|
113
|
+
:percent_same_house_85 => 0.5,
|
114
|
+
:percent_same_city_85 => 0.51,
|
115
|
+
:percent_same_state_85 => 0.64,
|
116
|
+
:lemas_sworn_full_time => 0.03,
|
117
|
+
:lemas_sworn_full_time_per_population => 0.13,
|
118
|
+
:lemas_sworn_full_time_field => 0.96,
|
119
|
+
:lemas_sworn_full_time_field_per_population => 0.17,
|
120
|
+
:lemas_total_requests => 0.06,
|
121
|
+
:lemas_total_requests_per_population => 0.18,
|
122
|
+
:total_requests_per_officer => 0.44,
|
123
|
+
:n_officers_per_population => 0.13,
|
124
|
+
:racial_match_community_police => 0.94,
|
125
|
+
:percent_police_white => 0.93,
|
126
|
+
:percent_police_black => 0.03,
|
127
|
+
:percent_police_hispanic => 0.07,
|
128
|
+
:percent_police_asian => 0.1,
|
129
|
+
:percent_police_minority => 0.07,
|
130
|
+
:n_officers_assigned_drug_units => 0.02,
|
131
|
+
:n_kinds_drugs_seized => 0.57,
|
132
|
+
:police_average_overtime_worked => 0.29,
|
133
|
+
:land_area => 0.12,
|
134
|
+
:population_density => 0.26,
|
135
|
+
:percent_use_public_transit => 0.2,
|
136
|
+
:n_police_cars => 0.06,
|
137
|
+
:n_police_operating_budget => 0.04,
|
138
|
+
:lemas_percent_police_on_patrol => 0.9,
|
139
|
+
:lemas_gang_unit_deployed => 0.5,
|
140
|
+
:lemas_percent_office_drug_units => 0.32,
|
141
|
+
:police_operating_budget_per_population => 0.14,
|
142
|
+
:total_violent_crimes_per_population => 0.2,
|
143
|
+
},
|
144
|
+
{
|
145
|
+
:state => 6,
|
146
|
+
:county => nil,
|
147
|
+
:community => nil,
|
148
|
+
:community_name => "Ontariocity",
|
149
|
+
:fold => 10,
|
150
|
+
:population => 0.2,
|
151
|
+
:household_size => 0.78,
|
152
|
+
:race_percent_black => 0.14,
|
153
|
+
:race_percent_white => 0.46,
|
154
|
+
:race_percent_asian => 0.24,
|
155
|
+
:race_percent_hispanic => 0.77,
|
156
|
+
:age_percent_12_to_21 => 0.5,
|
157
|
+
:age_percent_12_to_29 => 0.62,
|
158
|
+
:age_percent_16_to_24 => 0.4,
|
159
|
+
:age_percent_65_and_upper => 0.17,
|
160
|
+
:n_people_urban => 0.21,
|
161
|
+
:percent_people_urban => 1,
|
162
|
+
:median_income => 0.4,
|
163
|
+
:percent_households_with_wage => 0.73,
|
164
|
+
:percent_households_with_farm_self => 0.22,
|
165
|
+
:percent_households_with_investment_income => 0.25,
|
166
|
+
:percent_households_with_social_security => 0.26,
|
167
|
+
:percent_households_with_public_assistant => 0.47,
|
168
|
+
:percent_households_with_retire => 0.29,
|
169
|
+
:median_family_income => 0.36,
|
170
|
+
:per_capita_income => 0.24,
|
171
|
+
:per_capita_income_white => 0.28,
|
172
|
+
:per_capita_income_black => 0.32,
|
173
|
+
:per_capita_income_indian => 0.22,
|
174
|
+
:per_capita_income_asian => 0.27,
|
175
|
+
:per_capita_income_other => 0.25,
|
176
|
+
:per_capita_income_hispanic => 0.29,
|
177
|
+
:n_people_under_poverty => 0.16,
|
178
|
+
:percent_people_under_poverty => 0.35,
|
179
|
+
:percent_less_9th_grade => 0.5,
|
180
|
+
:percent_not_high_school_graduate => 0.55,
|
181
|
+
:percent_bachelors_or_more => 0.16,
|
182
|
+
:percent_unemployed => 0.47,
|
183
|
+
:percent_employed => 0.58,
|
184
|
+
:percent_employed_manufacturing => 0.53,
|
185
|
+
:percent_employed_professional_service => 0.2,
|
186
|
+
:percent_occupations_manufacturing => 0.6,
|
187
|
+
:percent_occupations_management_professional => 0.24,
|
188
|
+
:male_percent_divorced => 0.49,
|
189
|
+
:male_percent_never_married => 0.5,
|
190
|
+
:female_percent_divorced => 0.6,
|
191
|
+
:total_percent_divorced => 0.57,
|
192
|
+
:mean_persons_per_family => 0.86,
|
193
|
+
:percent_family_2_parents => 0.61,
|
194
|
+
:percent_kids_2_parents => 0.59,
|
195
|
+
:percent_young_kids_2_parents => 0.64,
|
196
|
+
:percent_teen_2_parents => 0.6,
|
197
|
+
:percent_work_mom_young_kids => 0.35,
|
198
|
+
:percent_work_mom => 0.35,
|
199
|
+
:n_illegals => 0.11,
|
200
|
+
:percent_illegals => 0.43,
|
201
|
+
:n_immigrants => 0.2,
|
202
|
+
:percent_immigrants_recent => 0.43,
|
203
|
+
:percent_immigrants_recent_5 => 0.52,
|
204
|
+
:percent_immigrants_recent_8 => 0.58,
|
205
|
+
:percent_immigrants_recent_10 => 0.65,
|
206
|
+
:percent_population_immigranted_recent => 0.68,
|
207
|
+
:percent_population_immigranted_recent_5 => 0.73,
|
208
|
+
:percent_population_immigranted_recent_8 => 0.73,
|
209
|
+
:percent_population_immigranted_recent_10 => 0.75,
|
210
|
+
:percent_speak_english_only => 0.35,
|
211
|
+
:percent_not_speak_english_well => 0.77,
|
212
|
+
:percent_large_households_family => 0.84,
|
213
|
+
:percent_large_households_occupied => 0.83,
|
214
|
+
:mean_persons_per_occupied_household => 0.84,
|
215
|
+
:mean_persons_per_owner_occupied_household => 0.78,
|
216
|
+
:mean_persons_per_rental_occupied_household => 0.89,
|
217
|
+
:percent_persons_owner_occupied_household => 0.46,
|
218
|
+
:percent_persons_dense_housing => 0.76,
|
219
|
+
:percent_housing_less_3_bedrooms => 0.55,
|
220
|
+
:median_n_bedrooms => 0,
|
221
|
+
:n_vacant_households => 0.12,
|
222
|
+
:percent_housing_occupied => 0.8,
|
223
|
+
:percent_housing_owner_occupied => 0.49,
|
224
|
+
:percent_vacant_housing_boarded => 0.1,
|
225
|
+
:percent_vacant_housing_more_6_months => 0.16,
|
226
|
+
:median_year_housing_built => 0.71,
|
227
|
+
:percent_housing_no_phone => 0.32,
|
228
|
+
:percent_housing_without_full_plumbing => 0.21,
|
229
|
+
:owner_occupied_housing_lower_quartile => 0.35,
|
230
|
+
:owner_occupied_housing_median => 0.35,
|
231
|
+
:owner_occupied_housing_higher_quartile => 0.32,
|
232
|
+
:rental_housing_lower_quartile => 0.54,
|
233
|
+
:rental_housing_median => 0.53,
|
234
|
+
:rental_housing_higher_quartile => 0.61,
|
235
|
+
:median_rent => 0.54,
|
236
|
+
:median_rent_percent_household_income => 0.69,
|
237
|
+
:median_owner_cost_percent_household_income => 0.73,
|
238
|
+
:median_owner_cost_percent_household_income_no_mortgage => 0.21,
|
239
|
+
:n_people_shelter => 0.08,
|
240
|
+
:n_people_street => 0.08,
|
241
|
+
:percent_foreign_born => 0.68,
|
242
|
+
:percent_born_same_state => 0.5,
|
243
|
+
:percent_same_house_85 => 0.34,
|
244
|
+
:percent_same_city_85 => 0.35,
|
245
|
+
:percent_same_state_85 => 0.68,
|
246
|
+
:lemas_sworn_full_time => 0.03,
|
247
|
+
:lemas_sworn_full_time_per_population => 0.11,
|
248
|
+
:lemas_sworn_full_time_field => 0.96,
|
249
|
+
:lemas_sworn_full_time_field_per_population => 0.14,
|
250
|
+
:lemas_total_requests => 0.04,
|
251
|
+
:lemas_total_requests_per_population => 0.11,
|
252
|
+
:total_requests_per_officer => 0.3,
|
253
|
+
:n_officers_per_population => 0.11,
|
254
|
+
:racial_match_community_police => 0.45,
|
255
|
+
:percent_police_white => 0.74,
|
256
|
+
:percent_police_black => 0.34,
|
257
|
+
:percent_police_hispanic => 0.07,
|
258
|
+
:percent_police_asian => 0,
|
259
|
+
:percent_police_minority => 0.28,
|
260
|
+
:n_officers_assigned_drug_units => 0.07,
|
261
|
+
:n_kinds_drugs_seized => 0.36,
|
262
|
+
:police_average_overtime_worked => 0.11,
|
263
|
+
:land_area => 0.11,
|
264
|
+
:population_density => 0.3,
|
265
|
+
:percent_use_public_transit => 0.05,
|
266
|
+
:n_police_cars => 0.08,
|
267
|
+
:n_police_operating_budget => 0.04,
|
268
|
+
:lemas_percent_police_on_patrol => 0.73,
|
269
|
+
:lemas_gang_unit_deployed => 0.5,
|
270
|
+
:lemas_percent_office_drug_units => 1,
|
271
|
+
:police_operating_budget_per_population => 0.13,
|
272
|
+
:total_violent_crimes_per_population => 0.48
|
273
|
+
},
|
274
|
+
],
|
275
|
+
[
|
276
|
+
records.size,
|
277
|
+
records[0].to_h,
|
278
|
+
records[-1].to_h
|
279
|
+
])
|
280
|
+
end
|
281
|
+
|
282
|
+
sub_test_case("#metadata") do
|
283
|
+
test("#description") do
|
284
|
+
description = @dataset.metadata.description
|
285
|
+
assert do
|
286
|
+
description.start_with?("Title: Communities and Crime")
|
287
|
+
end
|
288
|
+
end
|
289
|
+
end
|
290
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
class TestDataset < Test::Unit::TestCase
|
2
|
+
sub_test_case("#clear_cache!") do
|
3
|
+
def setup
|
4
|
+
@dataset = Datasets::Iris.new
|
5
|
+
@cache_dir_path = @dataset.send(:cache_dir_path)
|
6
|
+
end
|
7
|
+
|
8
|
+
test("when the dataset is downloaded") do
|
9
|
+
@dataset.first # This ensures the dataset downloaded
|
10
|
+
existence = {before: @cache_dir_path.join("iris.csv").exist?}
|
11
|
+
|
12
|
+
@dataset.clear_cache!
|
13
|
+
existence[:after] = @cache_dir_path.join("iris.csv").exist?
|
14
|
+
|
15
|
+
assert_equal({before: true, after: false},
|
16
|
+
existence)
|
17
|
+
end
|
18
|
+
|
19
|
+
test("when the dataset is not downloaded") do
|
20
|
+
FileUtils.rmtree(@cache_dir_path.to_s, secure: true) if @cache_dir_path.exist?
|
21
|
+
|
22
|
+
assert_nothing_raised do
|
23
|
+
@dataset.clear_cache!
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
class DownloaderTest < Test::Unit::TestCase
|
2
|
+
include Helper::Sandbox
|
3
|
+
|
4
|
+
sub_test_case("#download") do
|
5
|
+
def setup
|
6
|
+
setup_sandbox
|
7
|
+
end
|
8
|
+
|
9
|
+
def teardown
|
10
|
+
teardown_sandbox
|
11
|
+
end
|
12
|
+
|
13
|
+
test("too many redirection") do
|
14
|
+
first_url = "https://example.com/file"
|
15
|
+
last_url = "https://example.com/last_redirection"
|
16
|
+
expected_message = "too many redirections: #{first_url} .. #{last_url}"
|
17
|
+
output_path = @tmp_dir + "file"
|
18
|
+
downloader = Datasets::Downloader.new(first_url)
|
19
|
+
|
20
|
+
downloader.define_singleton_method(:start_http) do |url, headers|
|
21
|
+
raise Datasets::Downloader::TooManyRedirects, "too many redirections: #{last_url}"
|
22
|
+
end
|
23
|
+
|
24
|
+
assert_raise(Datasets::Downloader::TooManyRedirects.new(expected_message)) do
|
25
|
+
downloader.download(output_path)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,383 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'pathname'
|
4
|
+
require 'tmpdir'
|
5
|
+
|
6
|
+
class EStatJapanTest < Test::Unit::TestCase
|
7
|
+
sub_test_case('app_id') do
|
8
|
+
def setup
|
9
|
+
ENV['ESTATJAPAN_APP_ID'] = nil
|
10
|
+
Datasets::EStatJapan.app_id = nil
|
11
|
+
end
|
12
|
+
|
13
|
+
test('nothing') do
|
14
|
+
assert_raise(Datasets::EStatJapan::ArgumentError) do
|
15
|
+
Datasets::EStatJapan::StatsData.new('test-data-id')
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
test('constructor') do
|
20
|
+
stats_data = Datasets::EStatJapan::StatsData.new('test-data-id', app_id: 'test_by_constructor')
|
21
|
+
assert_equal('test_by_constructor', stats_data.app_id)
|
22
|
+
end
|
23
|
+
|
24
|
+
test('env') do
|
25
|
+
ENV['ESTATJAPAN_APP_ID'] = 'test_by_env'
|
26
|
+
stats_data = Datasets::EStatJapan::StatsData.new('test-data-id')
|
27
|
+
assert_equal('test_by_env', stats_data.app_id)
|
28
|
+
end
|
29
|
+
|
30
|
+
test('configure') do
|
31
|
+
Datasets::EStatJapan.configure do |config|
|
32
|
+
config.app_id = 'test_by_configure'
|
33
|
+
end
|
34
|
+
stats_data = Datasets::EStatJapan::StatsData.new('test-data-id')
|
35
|
+
assert_equal('test_by_configure', stats_data.app_id)
|
36
|
+
end
|
37
|
+
|
38
|
+
test('env & configure') do
|
39
|
+
ENV['ESTATJAPAN_APP_ID'] = 'test_by_env'
|
40
|
+
Datasets::EStatJapan.configure do |config|
|
41
|
+
config.app_id = 'test_by_configure'
|
42
|
+
end
|
43
|
+
stats_data = Datasets::EStatJapan::StatsData.new('test-data-id')
|
44
|
+
assert_equal('test_by_configure', stats_data.app_id)
|
45
|
+
end
|
46
|
+
|
47
|
+
test('env & configure & constructor') do
|
48
|
+
ENV['ESTATJAPAN_APP_ID'] = 'test_by_env'
|
49
|
+
Datasets::EStatJapan.configure do |config|
|
50
|
+
config.app_id = 'test_by_configure'
|
51
|
+
end
|
52
|
+
stats_data = Datasets::EStatJapan::StatsData.new('test-data-id', app_id: 'test_by_constructor')
|
53
|
+
assert_equal('test_by_constructor', stats_data.app_id)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
sub_test_case('url generation') do
|
58
|
+
def setup
|
59
|
+
ENV['ESTATJAPAN_APP_ID'] = nil
|
60
|
+
Datasets::EStatJapan.app_id = nil
|
61
|
+
end
|
62
|
+
|
63
|
+
test('generates url correctly') do
|
64
|
+
Datasets::EStatJapan.app_id = 'abcdef'
|
65
|
+
stats_data = Datasets::EStatJapan::StatsData.new('test-data-id')
|
66
|
+
stats_data_id = '000000'
|
67
|
+
stats_data.instance_eval do
|
68
|
+
@id = stats_data_id
|
69
|
+
@base_url = 'http://testurl/rest/2.1/app/json/getStatsData'
|
70
|
+
end
|
71
|
+
url = stats_data.send(:generate_url)
|
72
|
+
assert_equal(
|
73
|
+
'http://testurl/rest/2.1/app/json/getStatsData' \
|
74
|
+
'?appId=abcdef&lang=J&statsDataId=000000&' \
|
75
|
+
'metaGetFlg=Y&cntGetFlg=N§ionHeaderFlg=1',
|
76
|
+
url.to_s
|
77
|
+
)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
sub_test_case('parsing records') do
|
82
|
+
def setup
|
83
|
+
Datasets::EStatJapan.app_id = nil
|
84
|
+
# prepare test data
|
85
|
+
class_obj = [
|
86
|
+
{
|
87
|
+
"@name": 'table1',
|
88
|
+
"@id": 'tab',
|
89
|
+
"CLASS": {
|
90
|
+
"@level": '1',
|
91
|
+
"@code": '00001',
|
92
|
+
"@name": 'table1'
|
93
|
+
}
|
94
|
+
},
|
95
|
+
{
|
96
|
+
"@name": 'data1',
|
97
|
+
"@id": 'cat01',
|
98
|
+
"CLASS": {
|
99
|
+
"@level": '1',
|
100
|
+
"@code": 'data1',
|
101
|
+
"@name": 'data1_name'
|
102
|
+
}
|
103
|
+
},
|
104
|
+
{
|
105
|
+
"@name": 'area1',
|
106
|
+
"@id": 'area',
|
107
|
+
"CLASS": [
|
108
|
+
{
|
109
|
+
"@level": '2',
|
110
|
+
"@code": '01100',
|
111
|
+
"@name": 'test1 big-city',
|
112
|
+
"@parentCode": '01000'
|
113
|
+
},
|
114
|
+
{
|
115
|
+
"@level": '3',
|
116
|
+
"@code": '01101',
|
117
|
+
"@name": 'test1 big-city a-ku',
|
118
|
+
"@parentCode": '01100'
|
119
|
+
},
|
120
|
+
{
|
121
|
+
"@level": '3',
|
122
|
+
"@code": '01102',
|
123
|
+
"@name": 'test1 big-city b-ku',
|
124
|
+
"@parentCode": '01100'
|
125
|
+
},
|
126
|
+
{
|
127
|
+
"@level": '2',
|
128
|
+
"@code": '02555',
|
129
|
+
"@name": 'test2 a-city',
|
130
|
+
"@parentCode": '02000'
|
131
|
+
},
|
132
|
+
{
|
133
|
+
"@level": '2',
|
134
|
+
"@code": '02556',
|
135
|
+
"@name": 'test2 b-city',
|
136
|
+
"@parentCode": '02000'
|
137
|
+
}
|
138
|
+
]
|
139
|
+
},
|
140
|
+
{
|
141
|
+
"@name": 'time',
|
142
|
+
"@id": 'time',
|
143
|
+
"CLASS": [
|
144
|
+
{
|
145
|
+
"@level": '1',
|
146
|
+
"@code": 'time1',
|
147
|
+
"@name": 'time1'
|
148
|
+
},
|
149
|
+
{
|
150
|
+
"@level": '1',
|
151
|
+
"@code": 'time2',
|
152
|
+
"@name": 'time2'
|
153
|
+
},
|
154
|
+
{
|
155
|
+
"@level": '1',
|
156
|
+
"@code": 'time3',
|
157
|
+
"@name": 'time3'
|
158
|
+
}
|
159
|
+
]
|
160
|
+
}
|
161
|
+
]
|
162
|
+
data_inf = class_obj[2][:CLASS].map do |entry|
|
163
|
+
[
|
164
|
+
{
|
165
|
+
"$": 1000,
|
166
|
+
"@area": entry[:@code],
|
167
|
+
"@cat01": 'data1',
|
168
|
+
"@tab": 'table1',
|
169
|
+
"@time": 'time1',
|
170
|
+
"@unit": 'person'
|
171
|
+
},
|
172
|
+
{
|
173
|
+
"$": 2000,
|
174
|
+
"@area": entry[:@code],
|
175
|
+
"@cat01": 'data1',
|
176
|
+
"@tab": 'table1',
|
177
|
+
"@time": 'time2',
|
178
|
+
"@unit": 'person'
|
179
|
+
}
|
180
|
+
]
|
181
|
+
end.flatten
|
182
|
+
## test record for `skip_nil_row: true`
|
183
|
+
data_inf << {
|
184
|
+
"$": 3000,
|
185
|
+
"@area": '02556',
|
186
|
+
"@cat01": 'data1',
|
187
|
+
"@tab": 'table1',
|
188
|
+
"@time": 'time3',
|
189
|
+
"@unit": 'person'
|
190
|
+
}
|
191
|
+
@response_data_default = {
|
192
|
+
'GET_STATS_DATA' => {
|
193
|
+
'RESULT' => {
|
194
|
+
'STATUS' => 0,
|
195
|
+
'ERROR_MSG' => 'succeeded'
|
196
|
+
},
|
197
|
+
'STATISTICAL_DATA' => {
|
198
|
+
'DATA_INF' => {
|
199
|
+
'VALUE' => data_inf
|
200
|
+
},
|
201
|
+
'CLASS_INF' => {
|
202
|
+
'CLASS_OBJ' => class_obj
|
203
|
+
}
|
204
|
+
}
|
205
|
+
}
|
206
|
+
}
|
207
|
+
|
208
|
+
@tmp_dir = Dir.mktmpdir
|
209
|
+
@test_data_path = Pathname(File.join(@tmp_dir, '200-ok.json'))
|
210
|
+
ENV['ESTATJAPAN_APP_ID'] = 'test_appid_correct'
|
211
|
+
File.open(@test_data_path, 'w') do |f|
|
212
|
+
f.write(@response_data_default.to_json)
|
213
|
+
end
|
214
|
+
end
|
215
|
+
|
216
|
+
def teardown
|
217
|
+
FileUtils.remove_entry_secure(@test_data_path)
|
218
|
+
FileUtils.remove_entry_secure(@tmp_dir)
|
219
|
+
end
|
220
|
+
|
221
|
+
test('parsing records with default option') do
|
222
|
+
test_data_path = @test_data_path
|
223
|
+
stats_data = Datasets::EStatJapan::StatsData.new('test-data-id', app_id: 'valid')
|
224
|
+
stats_data.instance_eval do
|
225
|
+
@data_path = test_data_path
|
226
|
+
end
|
227
|
+
|
228
|
+
records = []
|
229
|
+
value_num = 0
|
230
|
+
stats_data.each do |record|
|
231
|
+
records << record
|
232
|
+
value_num += record.values.length
|
233
|
+
end
|
234
|
+
assert_equal(4, records.length)
|
235
|
+
assert_equal(4 * 2, value_num)
|
236
|
+
assert_equal(4, stats_data.areas.length)
|
237
|
+
assert_equal(3, stats_data.time_tables.length)
|
238
|
+
assert_equal(2, stats_data.time_tables.reject { |_k, v| v[:skip] }.to_h.length)
|
239
|
+
assert_equal(1, stats_data.columns.length)
|
240
|
+
assert_equal(2, stats_data.schema.length)
|
241
|
+
end
|
242
|
+
|
243
|
+
test('parsing records with hierarchy_selection') do
|
244
|
+
test_data_path = @test_data_path
|
245
|
+
stats_data = \
|
246
|
+
Datasets::EStatJapan::StatsData.new('test-data-id',
|
247
|
+
hierarchy_selection: 'parent')
|
248
|
+
stats_data.instance_eval do
|
249
|
+
@data_path = test_data_path
|
250
|
+
end
|
251
|
+
records = []
|
252
|
+
stats_data.each do |record|
|
253
|
+
records << record
|
254
|
+
end
|
255
|
+
assert_equal(3, records.length)
|
256
|
+
assert_equal(3, stats_data.areas.length)
|
257
|
+
assert_equal(3, stats_data.time_tables.length)
|
258
|
+
assert_equal(2, stats_data.time_tables.reject { |_k, v| v[:skip] }.to_h.length)
|
259
|
+
assert_equal(1, stats_data.columns.length)
|
260
|
+
assert_equal(2, stats_data.schema.length)
|
261
|
+
|
262
|
+
stats_data = \
|
263
|
+
Datasets::EStatJapan::StatsData.new('test-data-id',
|
264
|
+
hierarchy_selection: 'child')
|
265
|
+
stats_data.instance_eval do
|
266
|
+
@data_path = test_data_path
|
267
|
+
end
|
268
|
+
records = []
|
269
|
+
stats_data.each do |record|
|
270
|
+
records << record
|
271
|
+
end
|
272
|
+
assert_equal(4, records.length)
|
273
|
+
assert_equal(4, stats_data.areas.length)
|
274
|
+
assert_equal(3, stats_data.time_tables.length)
|
275
|
+
assert_equal(2, stats_data.time_tables.reject { |_k, v| v[:skip] }.to_h.length)
|
276
|
+
assert_equal(1, stats_data.columns.length)
|
277
|
+
assert_equal(2, stats_data.schema.length)
|
278
|
+
|
279
|
+
stats_data = \
|
280
|
+
Datasets::EStatJapan::StatsData.new('test-data-id',
|
281
|
+
hierarchy_selection: 'both')
|
282
|
+
stats_data.instance_eval do
|
283
|
+
@data_path = test_data_path
|
284
|
+
end
|
285
|
+
records = []
|
286
|
+
stats_data.each do |record|
|
287
|
+
records << record
|
288
|
+
end
|
289
|
+
assert_equal(5, records.length)
|
290
|
+
assert_equal(5, stats_data.areas.length)
|
291
|
+
assert_equal(3, stats_data.time_tables.length)
|
292
|
+
assert_equal(2, stats_data.time_tables.reject { |_k, v| v[:skip] }.to_h.length)
|
293
|
+
assert_equal(1, stats_data.columns.length)
|
294
|
+
assert_equal(2, stats_data.schema.length)
|
295
|
+
end
|
296
|
+
|
297
|
+
test('parsing records with skip_nil_(column|row)') do
|
298
|
+
test_data_path = @test_data_path
|
299
|
+
stats_data = \
|
300
|
+
Datasets::EStatJapan::StatsData.new('test-data-id',
|
301
|
+
skip_nil_column: false)
|
302
|
+
stats_data.instance_eval do
|
303
|
+
@data_path = test_data_path
|
304
|
+
end
|
305
|
+
records = []
|
306
|
+
value_num = 0
|
307
|
+
stats_data.each do |record|
|
308
|
+
records << record
|
309
|
+
value_num += record.values.length
|
310
|
+
end
|
311
|
+
assert_equal(4, records.length)
|
312
|
+
assert_equal(4 * 3, value_num)
|
313
|
+
assert_equal(4, stats_data.areas.length)
|
314
|
+
assert_equal(3, stats_data.time_tables.length)
|
315
|
+
assert_equal(3, stats_data.time_tables.reject { |_k, v| v[:skip] }.to_h.length)
|
316
|
+
assert_equal(1, stats_data.columns.length)
|
317
|
+
assert_equal(3, stats_data.schema.length)
|
318
|
+
|
319
|
+
stats_data = \
|
320
|
+
Datasets::EStatJapan::StatsData.new('test-data-id',
|
321
|
+
skip_nil_row: true,
|
322
|
+
skip_nil_column: false)
|
323
|
+
stats_data.instance_eval do
|
324
|
+
@data_path = test_data_path
|
325
|
+
end
|
326
|
+
records = []
|
327
|
+
value_num = 0
|
328
|
+
stats_data.each do |record|
|
329
|
+
records << record
|
330
|
+
value_num += record.values.length
|
331
|
+
end
|
332
|
+
assert_equal(1, records.length)
|
333
|
+
assert_equal(1 * 3, value_num)
|
334
|
+
assert_equal(4, stats_data.areas.length)
|
335
|
+
assert_equal(3, stats_data.time_tables.length)
|
336
|
+
assert_equal(3, stats_data.time_tables.reject { |_k, v| v[:skip] }.to_h.length)
|
337
|
+
assert_equal(1, stats_data.columns.length)
|
338
|
+
assert_equal(3, stats_data.schema.length)
|
339
|
+
end
|
340
|
+
end
|
341
|
+
|
342
|
+
sub_test_case('anomaly responses') do
|
343
|
+
def setup
|
344
|
+
ENV['ESTATJAPAN_APP_ID'] = nil
|
345
|
+
Datasets::EStatJapan.app_id = nil
|
346
|
+
@response_data = {
|
347
|
+
'GET_STATS_DATA' => {
|
348
|
+
'RESULT' => {
|
349
|
+
'STATUS' => 100,
|
350
|
+
'ERROR_MSG' => 'error message'
|
351
|
+
}
|
352
|
+
}
|
353
|
+
}
|
354
|
+
@tmp_dir = Dir.mktmpdir
|
355
|
+
@test_data_path = Pathname(File.join(@tmp_dir, '200-error.json'))
|
356
|
+
File.open(@test_data_path, 'w') do |f|
|
357
|
+
f.write(@response_data.to_json)
|
358
|
+
end
|
359
|
+
end
|
360
|
+
|
361
|
+
def teardown
|
362
|
+
FileUtils.remove_entry_secure(@tmp_dir)
|
363
|
+
end
|
364
|
+
|
365
|
+
test('forbidden access with invalid app_id') do
|
366
|
+
test_data_path = @test_data_path
|
367
|
+
ENV['ESTATJAPAN_APP_ID'] = 'test_appid_invalid'
|
368
|
+
stats_data = Datasets::EStatJapan::StatsData.new('test-data-id')
|
369
|
+
cache_file_path = nil
|
370
|
+
stats_data.instance_eval do
|
371
|
+
cache_file_path = @data_path = test_data_path
|
372
|
+
end
|
373
|
+
assert_raise(Datasets::EStatJapan::APIError) do
|
374
|
+
# contains no data
|
375
|
+
stats_data.each do |record|
|
376
|
+
record
|
377
|
+
end
|
378
|
+
end
|
379
|
+
# ensure remove error response cache
|
380
|
+
assert_equal(cache_file_path.exist?, false)
|
381
|
+
end
|
382
|
+
end
|
383
|
+
end
|