red-datasets 0.0.8 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
data/red-datasets.gemspec CHANGED
@@ -35,6 +35,7 @@ Gem::Specification.new do |spec|
35
35
  spec.test_files += Dir.glob("test/**/*")
36
36
 
37
37
  spec.add_runtime_dependency("csv", ">= 3.0.5")
38
+ spec.add_runtime_dependency("rexml")
38
39
  spec.add_runtime_dependency("rubyzip")
39
40
 
40
41
  spec.add_development_dependency("bundler")
data/test/run-test.rb CHANGED
@@ -13,4 +13,6 @@ $LOAD_PATH.unshift(lib_dir.to_s)
13
13
 
14
14
  require_relative "helper"
15
15
 
16
+ ARGV.unshift("--max-diff-target-string-size=#{10 * 1024}")
17
+
16
18
  exit(Test::Unit::AutoRunner.run(true, test_dir.to_s))
@@ -0,0 +1,180 @@
1
+ class CLDRPluralsTest < Test::Unit::TestCase
2
+ def setup
3
+ @dataset = Datasets::CLDRPlurals.new
4
+ end
5
+
6
+ def locale(*args)
7
+ Datasets::CLDRPlurals::Locale.new(*args)
8
+ end
9
+
10
+ def rule(*args)
11
+ Datasets::CLDRPlurals::Rule.new(*args)
12
+ end
13
+
14
+ test("#each") do
15
+ locales = @dataset.each.to_a
16
+ assert_equal([
17
+ 218,
18
+ locale("bm",
19
+ [
20
+ rule("other",
21
+ nil,
22
+ [
23
+ 0..15,
24
+ 100,
25
+ 1000,
26
+ 10000,
27
+ 100000,
28
+ 1000000,
29
+ :elipsis,
30
+ ],
31
+ [
32
+ 0.0..1.5,
33
+ 10.0,
34
+ 100.0,
35
+ 1000.0,
36
+ 10000.0,
37
+ 100000.0,
38
+ 1000000.0,
39
+ :elipsis,
40
+ ])
41
+ ]),
42
+ locale("kw",
43
+ [
44
+ rule("zero",
45
+ [:equal, "n", [0]],
46
+ [0],
47
+ [0.0, 0.00, 0.000, 0.0000]),
48
+ rule("one",
49
+ [:equal, "n", [1]],
50
+ [1],
51
+ [1.0, 1.00, 1.000, 1.0000]),
52
+ rule("two",
53
+ [:or,
54
+ [:equal,
55
+ [:mod, "n", 100],
56
+ [2, 22, 42, 62, 82]],
57
+ [:and,
58
+ [:equal, [:mod, "n", 1000], [0]],
59
+ [:equal,
60
+ [:mod, "n", 100000],
61
+ [1000..20000, 40000, 60000, 80000]]],
62
+ [:and,
63
+ [:not_equal, "n", [0]],
64
+ [:equal, [:mod, "n", 1000000], [100000]]]],
65
+ [
66
+ 2,
67
+ 22,
68
+ 42,
69
+ 62,
70
+ 82,
71
+ 102,
72
+ 122,
73
+ 142,
74
+ 1000,
75
+ 10000,
76
+ 100000,
77
+ :elipsis,
78
+ ],
79
+ [
80
+ 2.0,
81
+ 22.0,
82
+ 42.0,
83
+ 62.0,
84
+ 82.0,
85
+ 102.0,
86
+ 122.0,
87
+ 142.0,
88
+ 1000.0,
89
+ 10000.0,
90
+ 100000.0,
91
+ :elipsis,
92
+ ]),
93
+ rule("few",
94
+ [:equal,
95
+ [:mod, "n", 100],
96
+ [3, 23, 43, 63, 83]],
97
+ [
98
+ 3,
99
+ 23,
100
+ 43,
101
+ 63,
102
+ 83,
103
+ 103,
104
+ 123,
105
+ 143,
106
+ 1003,
107
+ :elipsis,
108
+ ],
109
+ [
110
+ 3.0,
111
+ 23.0,
112
+ 43.0,
113
+ 63.0,
114
+ 83.0,
115
+ 103.0,
116
+ 123.0,
117
+ 143.0,
118
+ 1003.0,
119
+ :elipsis,
120
+ ]),
121
+ rule("many",
122
+ [:and,
123
+ [:not_equal, "n", [1]],
124
+ [:equal,
125
+ [:mod, "n", 100],
126
+ [1, 21, 41, 61, 81]]],
127
+ [
128
+ 21,
129
+ 41,
130
+ 61,
131
+ 81,
132
+ 101,
133
+ 121,
134
+ 141,
135
+ 161,
136
+ 1001,
137
+ :elipsis,
138
+ ],
139
+ [
140
+ 21.0,
141
+ 41.0,
142
+ 61.0,
143
+ 81.0,
144
+ 101.0,
145
+ 121.0,
146
+ 141.0,
147
+ 161.0,
148
+ 1001.0,
149
+ :elipsis,
150
+ ]),
151
+ rule("other",
152
+ nil,
153
+ [4..19, 100, 1004, 1000000, :elipsis],
154
+ [
155
+ 0.1..0.9,
156
+ 1.1..1.7,
157
+ 10.0,
158
+ 100.0,
159
+ 1000.1,
160
+ 1000000.0,
161
+ :elipsis,
162
+ ]),
163
+ ]),
164
+ ],
165
+ [
166
+ locales.size,
167
+ locales[0],
168
+ locales[-4],
169
+ ])
170
+ end
171
+
172
+ sub_test_case("#metadata") do
173
+ test("#description") do
174
+ description = @dataset.metadata.description
175
+ assert do
176
+ description.start_with?("Language plural rules in Unicode Common Locale Data Repository.")
177
+ end
178
+ end
179
+ end
180
+ end
@@ -0,0 +1,290 @@
1
+ class CommunitiesTest < Test::Unit::TestCase
2
+ def setup
3
+ @dataset = Datasets::Communities.new
4
+ end
5
+
6
+ def record(*args)
7
+ Datasets::Communities::Record.new(*args)
8
+ end
9
+
10
+ test('#each') do
11
+ records = @dataset.each.to_a
12
+ assert_equal([
13
+ 1994,
14
+ {
15
+ :state => 8,
16
+ :county => nil,
17
+ :community => nil,
18
+ :community_name => "Lakewoodcity",
19
+ :fold => 1,
20
+ :population => 0.19,
21
+ :household_size => 0.33,
22
+ :race_percent_black => 0.02,
23
+ :race_percent_white => 0.9,
24
+ :race_percent_asian => 0.12,
25
+ :race_percent_hispanic => 0.17,
26
+ :age_percent_12_to_21 => 0.34,
27
+ :age_percent_12_to_29 => 0.47,
28
+ :age_percent_16_to_24 => 0.29,
29
+ :age_percent_65_and_upper => 0.32,
30
+ :n_people_urban => 0.2,
31
+ :percent_people_urban => 1,
32
+ :median_income => 0.37,
33
+ :percent_households_with_wage => 0.72,
34
+ :percent_households_with_farm_self => 0.34,
35
+ :percent_households_with_investment_income => 0.6,
36
+ :percent_households_with_social_security => 0.29,
37
+ :percent_households_with_public_assistant => 0.15,
38
+ :percent_households_with_retire => 0.43,
39
+ :median_family_income => 0.39,
40
+ :per_capita_income => 0.4,
41
+ :per_capita_income_white => 0.39,
42
+ :per_capita_income_black => 0.32,
43
+ :per_capita_income_indian => 0.27,
44
+ :per_capita_income_asian => 0.27,
45
+ :per_capita_income_other => 0.36,
46
+ :per_capita_income_hispanic => 0.41,
47
+ :n_people_under_poverty => 0.08,
48
+ :percent_people_under_poverty => 0.19,
49
+ :percent_less_9th_grade => 0.1,
50
+ :percent_not_high_school_graduate => 0.18,
51
+ :percent_bachelors_or_more => 0.48,
52
+ :percent_unemployed => 0.27,
53
+ :percent_employed => 0.68,
54
+ :percent_employed_manufacturing => 0.23,
55
+ :percent_employed_professional_service => 0.41,
56
+ :percent_occupations_manufacturing => 0.25,
57
+ :percent_occupations_management_professional => 0.52,
58
+ :male_percent_divorced => 0.68,
59
+ :male_percent_never_married => 0.4,
60
+ :female_percent_divorced => 0.75,
61
+ :total_percent_divorced => 0.75,
62
+ :mean_persons_per_family => 0.35,
63
+ :percent_family_2_parents => 0.55,
64
+ :percent_kids_2_parents => 0.59,
65
+ :percent_young_kids_2_parents => 0.61,
66
+ :percent_teen_2_parents => 0.56,
67
+ :percent_work_mom_young_kids => 0.74,
68
+ :percent_work_mom => 0.76,
69
+ :n_illegals => 0.04,
70
+ :percent_illegals => 0.14,
71
+ :n_immigrants => 0.03,
72
+ :percent_immigrants_recent => 0.24,
73
+ :percent_immigrants_recent_5 => 0.27,
74
+ :percent_immigrants_recent_8 => 0.37,
75
+ :percent_immigrants_recent_10 => 0.39,
76
+ :percent_population_immigranted_recent => 0.07,
77
+ :percent_population_immigranted_recent_5 => 0.07,
78
+ :percent_population_immigranted_recent_8 => 0.08,
79
+ :percent_population_immigranted_recent_10 => 0.08,
80
+ :percent_speak_english_only => 0.89,
81
+ :percent_not_speak_english_well => 0.06,
82
+ :percent_large_households_family => 0.14,
83
+ :percent_large_households_occupied => 0.13,
84
+ :mean_persons_per_occupied_household => 0.33,
85
+ :mean_persons_per_owner_occupied_household => 0.39,
86
+ :mean_persons_per_rental_occupied_household => 0.28,
87
+ :percent_persons_owner_occupied_household => 0.55,
88
+ :percent_persons_dense_housing => 0.09,
89
+ :percent_housing_less_3_bedrooms => 0.51,
90
+ :median_n_bedrooms => 0.5,
91
+ :n_vacant_households => 0.21,
92
+ :percent_housing_occupied => 0.71,
93
+ :percent_housing_owner_occupied => 0.52,
94
+ :percent_vacant_housing_boarded => 0.05,
95
+ :percent_vacant_housing_more_6_months => 0.26,
96
+ :median_year_housing_built => 0.65,
97
+ :percent_housing_no_phone => 0.14,
98
+ :percent_housing_without_full_plumbing => 0.06,
99
+ :owner_occupied_housing_lower_quartile => 0.22,
100
+ :owner_occupied_housing_median => 0.19,
101
+ :owner_occupied_housing_higher_quartile => 0.18,
102
+ :rental_housing_lower_quartile => 0.36,
103
+ :rental_housing_median => 0.35,
104
+ :rental_housing_higher_quartile => 0.38,
105
+ :median_rent => 0.34,
106
+ :median_rent_percent_household_income => 0.38,
107
+ :median_owner_cost_percent_household_income => 0.46,
108
+ :median_owner_cost_percent_household_income_no_mortgage => 0.25,
109
+ :n_people_shelter => 0.04,
110
+ :n_people_street => 0,
111
+ :percent_foreign_born => 0.12,
112
+ :percent_born_same_state => 0.42,
113
+ :percent_same_house_85 => 0.5,
114
+ :percent_same_city_85 => 0.51,
115
+ :percent_same_state_85 => 0.64,
116
+ :lemas_sworn_full_time => 0.03,
117
+ :lemas_sworn_full_time_per_population => 0.13,
118
+ :lemas_sworn_full_time_field => 0.96,
119
+ :lemas_sworn_full_time_field_per_population => 0.17,
120
+ :lemas_total_requests => 0.06,
121
+ :lemas_total_requests_per_population => 0.18,
122
+ :total_requests_per_officer => 0.44,
123
+ :n_officers_per_population => 0.13,
124
+ :racial_match_community_police => 0.94,
125
+ :percent_police_white => 0.93,
126
+ :percent_police_black => 0.03,
127
+ :percent_police_hispanic => 0.07,
128
+ :percent_police_asian => 0.1,
129
+ :percent_police_minority => 0.07,
130
+ :n_officers_assigned_drug_units => 0.02,
131
+ :n_kinds_drugs_seized => 0.57,
132
+ :police_average_overtime_worked => 0.29,
133
+ :land_area => 0.12,
134
+ :population_density => 0.26,
135
+ :percent_use_public_transit => 0.2,
136
+ :n_police_cars => 0.06,
137
+ :n_police_operating_budget => 0.04,
138
+ :lemas_percent_police_on_patrol => 0.9,
139
+ :lemas_gang_unit_deployed => 0.5,
140
+ :lemas_percent_office_drug_units => 0.32,
141
+ :police_operating_budget_per_population => 0.14,
142
+ :total_violent_crimes_per_population => 0.2,
143
+ },
144
+ {
145
+ :state => 6,
146
+ :county => nil,
147
+ :community => nil,
148
+ :community_name => "Ontariocity",
149
+ :fold => 10,
150
+ :population => 0.2,
151
+ :household_size => 0.78,
152
+ :race_percent_black => 0.14,
153
+ :race_percent_white => 0.46,
154
+ :race_percent_asian => 0.24,
155
+ :race_percent_hispanic => 0.77,
156
+ :age_percent_12_to_21 => 0.5,
157
+ :age_percent_12_to_29 => 0.62,
158
+ :age_percent_16_to_24 => 0.4,
159
+ :age_percent_65_and_upper => 0.17,
160
+ :n_people_urban => 0.21,
161
+ :percent_people_urban => 1,
162
+ :median_income => 0.4,
163
+ :percent_households_with_wage => 0.73,
164
+ :percent_households_with_farm_self => 0.22,
165
+ :percent_households_with_investment_income => 0.25,
166
+ :percent_households_with_social_security => 0.26,
167
+ :percent_households_with_public_assistant => 0.47,
168
+ :percent_households_with_retire => 0.29,
169
+ :median_family_income => 0.36,
170
+ :per_capita_income => 0.24,
171
+ :per_capita_income_white => 0.28,
172
+ :per_capita_income_black => 0.32,
173
+ :per_capita_income_indian => 0.22,
174
+ :per_capita_income_asian => 0.27,
175
+ :per_capita_income_other => 0.25,
176
+ :per_capita_income_hispanic => 0.29,
177
+ :n_people_under_poverty => 0.16,
178
+ :percent_people_under_poverty => 0.35,
179
+ :percent_less_9th_grade => 0.5,
180
+ :percent_not_high_school_graduate => 0.55,
181
+ :percent_bachelors_or_more => 0.16,
182
+ :percent_unemployed => 0.47,
183
+ :percent_employed => 0.58,
184
+ :percent_employed_manufacturing => 0.53,
185
+ :percent_employed_professional_service => 0.2,
186
+ :percent_occupations_manufacturing => 0.6,
187
+ :percent_occupations_management_professional => 0.24,
188
+ :male_percent_divorced => 0.49,
189
+ :male_percent_never_married => 0.5,
190
+ :female_percent_divorced => 0.6,
191
+ :total_percent_divorced => 0.57,
192
+ :mean_persons_per_family => 0.86,
193
+ :percent_family_2_parents => 0.61,
194
+ :percent_kids_2_parents => 0.59,
195
+ :percent_young_kids_2_parents => 0.64,
196
+ :percent_teen_2_parents => 0.6,
197
+ :percent_work_mom_young_kids => 0.35,
198
+ :percent_work_mom => 0.35,
199
+ :n_illegals => 0.11,
200
+ :percent_illegals => 0.43,
201
+ :n_immigrants => 0.2,
202
+ :percent_immigrants_recent => 0.43,
203
+ :percent_immigrants_recent_5 => 0.52,
204
+ :percent_immigrants_recent_8 => 0.58,
205
+ :percent_immigrants_recent_10 => 0.65,
206
+ :percent_population_immigranted_recent => 0.68,
207
+ :percent_population_immigranted_recent_5 => 0.73,
208
+ :percent_population_immigranted_recent_8 => 0.73,
209
+ :percent_population_immigranted_recent_10 => 0.75,
210
+ :percent_speak_english_only => 0.35,
211
+ :percent_not_speak_english_well => 0.77,
212
+ :percent_large_households_family => 0.84,
213
+ :percent_large_households_occupied => 0.83,
214
+ :mean_persons_per_occupied_household => 0.84,
215
+ :mean_persons_per_owner_occupied_household => 0.78,
216
+ :mean_persons_per_rental_occupied_household => 0.89,
217
+ :percent_persons_owner_occupied_household => 0.46,
218
+ :percent_persons_dense_housing => 0.76,
219
+ :percent_housing_less_3_bedrooms => 0.55,
220
+ :median_n_bedrooms => 0,
221
+ :n_vacant_households => 0.12,
222
+ :percent_housing_occupied => 0.8,
223
+ :percent_housing_owner_occupied => 0.49,
224
+ :percent_vacant_housing_boarded => 0.1,
225
+ :percent_vacant_housing_more_6_months => 0.16,
226
+ :median_year_housing_built => 0.71,
227
+ :percent_housing_no_phone => 0.32,
228
+ :percent_housing_without_full_plumbing => 0.21,
229
+ :owner_occupied_housing_lower_quartile => 0.35,
230
+ :owner_occupied_housing_median => 0.35,
231
+ :owner_occupied_housing_higher_quartile => 0.32,
232
+ :rental_housing_lower_quartile => 0.54,
233
+ :rental_housing_median => 0.53,
234
+ :rental_housing_higher_quartile => 0.61,
235
+ :median_rent => 0.54,
236
+ :median_rent_percent_household_income => 0.69,
237
+ :median_owner_cost_percent_household_income => 0.73,
238
+ :median_owner_cost_percent_household_income_no_mortgage => 0.21,
239
+ :n_people_shelter => 0.08,
240
+ :n_people_street => 0.08,
241
+ :percent_foreign_born => 0.68,
242
+ :percent_born_same_state => 0.5,
243
+ :percent_same_house_85 => 0.34,
244
+ :percent_same_city_85 => 0.35,
245
+ :percent_same_state_85 => 0.68,
246
+ :lemas_sworn_full_time => 0.03,
247
+ :lemas_sworn_full_time_per_population => 0.11,
248
+ :lemas_sworn_full_time_field => 0.96,
249
+ :lemas_sworn_full_time_field_per_population => 0.14,
250
+ :lemas_total_requests => 0.04,
251
+ :lemas_total_requests_per_population => 0.11,
252
+ :total_requests_per_officer => 0.3,
253
+ :n_officers_per_population => 0.11,
254
+ :racial_match_community_police => 0.45,
255
+ :percent_police_white => 0.74,
256
+ :percent_police_black => 0.34,
257
+ :percent_police_hispanic => 0.07,
258
+ :percent_police_asian => 0,
259
+ :percent_police_minority => 0.28,
260
+ :n_officers_assigned_drug_units => 0.07,
261
+ :n_kinds_drugs_seized => 0.36,
262
+ :police_average_overtime_worked => 0.11,
263
+ :land_area => 0.11,
264
+ :population_density => 0.3,
265
+ :percent_use_public_transit => 0.05,
266
+ :n_police_cars => 0.08,
267
+ :n_police_operating_budget => 0.04,
268
+ :lemas_percent_police_on_patrol => 0.73,
269
+ :lemas_gang_unit_deployed => 0.5,
270
+ :lemas_percent_office_drug_units => 1,
271
+ :police_operating_budget_per_population => 0.13,
272
+ :total_violent_crimes_per_population => 0.48
273
+ },
274
+ ],
275
+ [
276
+ records.size,
277
+ records[0].to_h,
278
+ records[-1].to_h
279
+ ])
280
+ end
281
+
282
+ sub_test_case("#metadata") do
283
+ test("#description") do
284
+ description = @dataset.metadata.description
285
+ assert do
286
+ description.start_with?("Title: Communities and Crime")
287
+ end
288
+ end
289
+ end
290
+ end