red-datasets 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +6 -0
- data/doc/text/news.md +25 -0
- data/lib/datasets.rb +4 -0
- data/lib/datasets/cldr-plurals.rb +385 -0
- data/lib/datasets/communities.rb +198 -0
- data/lib/datasets/dataset.rb +1 -0
- data/lib/datasets/e-stat-japan.rb +320 -0
- data/lib/datasets/error.rb +4 -0
- data/lib/datasets/mnist.rb +0 -2
- data/lib/datasets/penguins.rb +125 -0
- data/lib/datasets/version.rb +1 -1
- data/red-datasets.gemspec +1 -0
- data/test/run-test.rb +2 -0
- data/test/test-cldr-plurals.rb +180 -0
- data/test/test-communities.rb +290 -0
- data/test/test-e-stat-japan.rb +383 -0
- data/test/test-penguins.rb +239 -0
- metadata +41 -15
data/lib/datasets/mnist.rb
CHANGED
@@ -0,0 +1,125 @@
|
|
1
|
+
require_relative "dataset"
|
2
|
+
|
3
|
+
module Datasets
|
4
|
+
module PenguinsRawData
|
5
|
+
Record = Struct.new(:study_name,
|
6
|
+
:sample_number,
|
7
|
+
:species,
|
8
|
+
:region,
|
9
|
+
:island,
|
10
|
+
:stage,
|
11
|
+
:individual_id,
|
12
|
+
:clutch_completion,
|
13
|
+
:date_egg,
|
14
|
+
:culmen_length_mm,
|
15
|
+
:culmen_depth_mm,
|
16
|
+
:flipper_length_mm,
|
17
|
+
:body_mass_g,
|
18
|
+
:sex,
|
19
|
+
:delta_15_n_permil,
|
20
|
+
:delta_13_c_permil,
|
21
|
+
:comments)
|
22
|
+
|
23
|
+
class SpeciesBase < Dataset
|
24
|
+
def initialize
|
25
|
+
super
|
26
|
+
species = self.class.name.split("::").last.downcase
|
27
|
+
@metadata.id = "palmerpenguins-raw-#{species}"
|
28
|
+
@metadata.url = self.class::URL
|
29
|
+
@metadata.licenses = ["CC0"]
|
30
|
+
@data_path = cache_dir_path + "penguins" + (species + ".csv")
|
31
|
+
end
|
32
|
+
|
33
|
+
attr_reader :data_path
|
34
|
+
|
35
|
+
def each
|
36
|
+
return to_enum(__method__) unless block_given?
|
37
|
+
|
38
|
+
open_data do |csv|
|
39
|
+
csv.each do |row|
|
40
|
+
next if row[0].nil?
|
41
|
+
record = Record.new(*row.fields)
|
42
|
+
yield record
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
private def open_data
|
48
|
+
download unless data_path.exist?
|
49
|
+
CSV.open(data_path, headers: :first_row, converters: :all) do |csv|
|
50
|
+
yield csv
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
private def download
|
55
|
+
super(data_path, metadata.url)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
# Adelie penguin data from: https://doi.org/10.6073/pasta/abc50eed9138b75f54eaada0841b9b86
|
60
|
+
class Adelie < SpeciesBase
|
61
|
+
DOI = "doi.org/10.6073/pasta/abc50eed9138b75f54eaada0841b9b86".freeze
|
62
|
+
URL = "https://portal.edirepository.org/nis/dataviewer?packageid=knb-lter-pal.219.3&entityid=002f3893385f710df69eeebe893144ff".freeze
|
63
|
+
end
|
64
|
+
|
65
|
+
# Gentoo penguin data from: https://doi.org/10.6073/pasta/2b1cff60f81640f182433d23e68541ce
|
66
|
+
class Gentoo < SpeciesBase
|
67
|
+
DOI = "doi.org/10.6073/pasta/2b1cff60f81640f182433d23e68541ce".freeze
|
68
|
+
URL = "https://portal.edirepository.org/nis/dataviewer?packageid=knb-lter-pal.220.3&entityid=e03b43c924f226486f2f0ab6709d2381".freeze
|
69
|
+
end
|
70
|
+
|
71
|
+
# Chinstrap penguin data from: https://doi.org/10.6073/pasta/409c808f8fc9899d02401bdb04580af7
|
72
|
+
class Chinstrap < SpeciesBase
|
73
|
+
DOI = "doi.org/10.6073/pasta/409c808f8fc9899d02401bdb04580af7".freeze
|
74
|
+
URL = "https://portal.edirepository.org/nis/dataviewer?packageid=knb-lter-pal.221.2&entityid=fe853aa8f7a59aa84cdd3197619ef462".freeze
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
# This dataset provides the same dataset as https://github.com/allisonhorst/palmerpenguins
|
79
|
+
class Penguins < Dataset
|
80
|
+
Record = Struct.new(:species,
|
81
|
+
:island,
|
82
|
+
:bill_length_mm,
|
83
|
+
:bill_depth_mm,
|
84
|
+
:flipper_length_mm,
|
85
|
+
:body_mass_g,
|
86
|
+
:sex,
|
87
|
+
:year)
|
88
|
+
|
89
|
+
def initialize
|
90
|
+
super
|
91
|
+
@metadata.id = "palmerpenguins"
|
92
|
+
@metadata.name = "palmerpenguins"
|
93
|
+
@metadata.url = "https://allisonhorst.github.io/palmerpenguins/"
|
94
|
+
@metadata.licenses = ["CC0"]
|
95
|
+
@metadata.description = "A great dataset for data exploration & visualization, as an alternative to iris"
|
96
|
+
end
|
97
|
+
|
98
|
+
def each(&block)
|
99
|
+
return to_enum(__method__) unless block_given?
|
100
|
+
|
101
|
+
species_classes = [
|
102
|
+
PenguinsRawData::Adelie,
|
103
|
+
PenguinsRawData::Gentoo,
|
104
|
+
PenguinsRawData::Chinstrap
|
105
|
+
]
|
106
|
+
|
107
|
+
species_classes.each do |species_class|
|
108
|
+
species_class.new.each do |raw_record|
|
109
|
+
yield convert_record(raw_record)
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
private def convert_record(raw_record)
|
115
|
+
Record.new(raw_record.species.split(' ')[0],
|
116
|
+
raw_record.island,
|
117
|
+
raw_record.culmen_length_mm,
|
118
|
+
raw_record.culmen_depth_mm,
|
119
|
+
raw_record.flipper_length_mm&.to_i,
|
120
|
+
raw_record.body_mass_g&.to_i,
|
121
|
+
raw_record.sex&.downcase,
|
122
|
+
raw_record.date_egg&.year)
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
data/lib/datasets/version.rb
CHANGED
data/red-datasets.gemspec
CHANGED
data/test/run-test.rb
CHANGED
@@ -0,0 +1,180 @@
|
|
1
|
+
class CLDRPluralsTest < Test::Unit::TestCase
|
2
|
+
def setup
|
3
|
+
@dataset = Datasets::CLDRPlurals.new
|
4
|
+
end
|
5
|
+
|
6
|
+
def locale(*args)
|
7
|
+
Datasets::CLDRPlurals::Locale.new(*args)
|
8
|
+
end
|
9
|
+
|
10
|
+
def rule(*args)
|
11
|
+
Datasets::CLDRPlurals::Rule.new(*args)
|
12
|
+
end
|
13
|
+
|
14
|
+
test("#each") do
|
15
|
+
locales = @dataset.each.to_a
|
16
|
+
assert_equal([
|
17
|
+
215,
|
18
|
+
locale("bm",
|
19
|
+
[
|
20
|
+
rule("other",
|
21
|
+
nil,
|
22
|
+
[
|
23
|
+
0..15,
|
24
|
+
100,
|
25
|
+
1000,
|
26
|
+
10000,
|
27
|
+
100000,
|
28
|
+
1000000,
|
29
|
+
:elipsis,
|
30
|
+
],
|
31
|
+
[
|
32
|
+
0.0..1.5,
|
33
|
+
10.0,
|
34
|
+
100.0,
|
35
|
+
1000.0,
|
36
|
+
10000.0,
|
37
|
+
100000.0,
|
38
|
+
1000000.0,
|
39
|
+
:elipsis,
|
40
|
+
])
|
41
|
+
]),
|
42
|
+
locale("kw",
|
43
|
+
[
|
44
|
+
rule("zero",
|
45
|
+
[:equal, "n", [0]],
|
46
|
+
[0],
|
47
|
+
[0.0, 0.00, 0.000, 0.0000]),
|
48
|
+
rule("one",
|
49
|
+
[:equal, "n", [1]],
|
50
|
+
[1],
|
51
|
+
[1.0, 1.00, 1.000, 1.0000]),
|
52
|
+
rule("two",
|
53
|
+
[:or,
|
54
|
+
[:equal,
|
55
|
+
[:mod, "n", 100],
|
56
|
+
[2, 22, 42, 62, 82]],
|
57
|
+
[:and,
|
58
|
+
[:equal, [:mod, "n", 1000], [0]],
|
59
|
+
[:equal,
|
60
|
+
[:mod, "n", 100000],
|
61
|
+
[1000..20000, 40000, 60000, 80000]]],
|
62
|
+
[:and,
|
63
|
+
[:not_equal, "n", [0]],
|
64
|
+
[:equal, [:mod, "n", 1000000], [100000]]]],
|
65
|
+
[
|
66
|
+
2,
|
67
|
+
22,
|
68
|
+
42,
|
69
|
+
62,
|
70
|
+
82,
|
71
|
+
102,
|
72
|
+
122,
|
73
|
+
142,
|
74
|
+
1000,
|
75
|
+
10000,
|
76
|
+
100000,
|
77
|
+
:elipsis,
|
78
|
+
],
|
79
|
+
[
|
80
|
+
2.0,
|
81
|
+
22.0,
|
82
|
+
42.0,
|
83
|
+
62.0,
|
84
|
+
82.0,
|
85
|
+
102.0,
|
86
|
+
122.0,
|
87
|
+
142.0,
|
88
|
+
1000.0,
|
89
|
+
10000.0,
|
90
|
+
100000.0,
|
91
|
+
:elipsis,
|
92
|
+
]),
|
93
|
+
rule("few",
|
94
|
+
[:equal,
|
95
|
+
[:mod, "n", 100],
|
96
|
+
[3, 23, 43, 63, 83]],
|
97
|
+
[
|
98
|
+
3,
|
99
|
+
23,
|
100
|
+
43,
|
101
|
+
63,
|
102
|
+
83,
|
103
|
+
103,
|
104
|
+
123,
|
105
|
+
143,
|
106
|
+
1003,
|
107
|
+
:elipsis,
|
108
|
+
],
|
109
|
+
[
|
110
|
+
3.0,
|
111
|
+
23.0,
|
112
|
+
43.0,
|
113
|
+
63.0,
|
114
|
+
83.0,
|
115
|
+
103.0,
|
116
|
+
123.0,
|
117
|
+
143.0,
|
118
|
+
1003.0,
|
119
|
+
:elipsis,
|
120
|
+
]),
|
121
|
+
rule("many",
|
122
|
+
[:and,
|
123
|
+
[:not_equal, "n", [1]],
|
124
|
+
[:equal,
|
125
|
+
[:mod, "n", 100],
|
126
|
+
[1, 21, 41, 61, 81]]],
|
127
|
+
[
|
128
|
+
21,
|
129
|
+
41,
|
130
|
+
61,
|
131
|
+
81,
|
132
|
+
101,
|
133
|
+
121,
|
134
|
+
141,
|
135
|
+
161,
|
136
|
+
1001,
|
137
|
+
:elipsis,
|
138
|
+
],
|
139
|
+
[
|
140
|
+
21.0,
|
141
|
+
41.0,
|
142
|
+
61.0,
|
143
|
+
81.0,
|
144
|
+
101.0,
|
145
|
+
121.0,
|
146
|
+
141.0,
|
147
|
+
161.0,
|
148
|
+
1001.0,
|
149
|
+
:elipsis,
|
150
|
+
]),
|
151
|
+
rule("other",
|
152
|
+
nil,
|
153
|
+
[4..19, 100, 1004, 1000000, :elipsis],
|
154
|
+
[
|
155
|
+
0.1..0.9,
|
156
|
+
1.1..1.7,
|
157
|
+
10.0,
|
158
|
+
100.0,
|
159
|
+
1000.1,
|
160
|
+
1000000.0,
|
161
|
+
:elipsis,
|
162
|
+
]),
|
163
|
+
]),
|
164
|
+
],
|
165
|
+
[
|
166
|
+
locales.size,
|
167
|
+
locales[0],
|
168
|
+
locales[-4],
|
169
|
+
])
|
170
|
+
end
|
171
|
+
|
172
|
+
sub_test_case("#metadata") do
|
173
|
+
test("#description") do
|
174
|
+
description = @dataset.metadata.description
|
175
|
+
assert do
|
176
|
+
description.start_with?("Language plural rules in Unicode Common Locale Data Repository.")
|
177
|
+
end
|
178
|
+
end
|
179
|
+
end
|
180
|
+
end
|
@@ -0,0 +1,290 @@
|
|
1
|
+
class CommunitiesTest < Test::Unit::TestCase
|
2
|
+
def setup
|
3
|
+
@dataset = Datasets::Communities.new
|
4
|
+
end
|
5
|
+
|
6
|
+
def record(*args)
|
7
|
+
Datasets::Communities::Record.new(*args)
|
8
|
+
end
|
9
|
+
|
10
|
+
test('#each') do
|
11
|
+
records = @dataset.each.to_a
|
12
|
+
assert_equal([
|
13
|
+
1994,
|
14
|
+
{
|
15
|
+
:state => 8,
|
16
|
+
:county => nil,
|
17
|
+
:community => nil,
|
18
|
+
:community_name => "Lakewoodcity",
|
19
|
+
:fold => 1,
|
20
|
+
:population => 0.19,
|
21
|
+
:household_size => 0.33,
|
22
|
+
:race_percent_black => 0.02,
|
23
|
+
:race_percent_white => 0.9,
|
24
|
+
:race_percent_asian => 0.12,
|
25
|
+
:race_percent_hispanic => 0.17,
|
26
|
+
:age_percent_12_to_21 => 0.34,
|
27
|
+
:age_percent_12_to_29 => 0.47,
|
28
|
+
:age_percent_16_to_24 => 0.29,
|
29
|
+
:age_percent_65_and_upper => 0.32,
|
30
|
+
:n_people_urban => 0.2,
|
31
|
+
:percent_people_urban => 1,
|
32
|
+
:median_income => 0.37,
|
33
|
+
:percent_households_with_wage => 0.72,
|
34
|
+
:percent_households_with_farm_self => 0.34,
|
35
|
+
:percent_households_with_investment_income => 0.6,
|
36
|
+
:percent_households_with_social_security => 0.29,
|
37
|
+
:percent_households_with_public_assistant => 0.15,
|
38
|
+
:percent_households_with_retire => 0.43,
|
39
|
+
:median_family_income => 0.39,
|
40
|
+
:per_capita_income => 0.4,
|
41
|
+
:per_capita_income_white => 0.39,
|
42
|
+
:per_capita_income_black => 0.32,
|
43
|
+
:per_capita_income_indian => 0.27,
|
44
|
+
:per_capita_income_asian => 0.27,
|
45
|
+
:per_capita_income_other => 0.36,
|
46
|
+
:per_capita_income_hispanic => 0.41,
|
47
|
+
:n_people_under_poverty => 0.08,
|
48
|
+
:percent_people_under_poverty => 0.19,
|
49
|
+
:percent_less_9th_grade => 0.1,
|
50
|
+
:percent_not_high_school_graduate => 0.18,
|
51
|
+
:percent_bachelors_or_more => 0.48,
|
52
|
+
:percent_unemployed => 0.27,
|
53
|
+
:percent_employed => 0.68,
|
54
|
+
:percent_employed_manufacturing => 0.23,
|
55
|
+
:percent_employed_professional_service => 0.41,
|
56
|
+
:percent_occupations_manufacturing => 0.25,
|
57
|
+
:percent_occupations_management_professional => 0.52,
|
58
|
+
:male_percent_divorced => 0.68,
|
59
|
+
:male_percent_never_married => 0.4,
|
60
|
+
:female_percent_divorced => 0.75,
|
61
|
+
:total_percent_divorced => 0.75,
|
62
|
+
:mean_persons_per_family => 0.35,
|
63
|
+
:percent_family_2_parents => 0.55,
|
64
|
+
:percent_kids_2_parents => 0.59,
|
65
|
+
:percent_young_kids_2_parents => 0.61,
|
66
|
+
:percent_teen_2_parents => 0.56,
|
67
|
+
:percent_work_mom_young_kids => 0.74,
|
68
|
+
:percent_work_mom => 0.76,
|
69
|
+
:n_illegals => 0.04,
|
70
|
+
:percent_illegals => 0.14,
|
71
|
+
:n_immigrants => 0.03,
|
72
|
+
:percent_immigrants_recent => 0.24,
|
73
|
+
:percent_immigrants_recent_5 => 0.27,
|
74
|
+
:percent_immigrants_recent_8 => 0.37,
|
75
|
+
:percent_immigrants_recent_10 => 0.39,
|
76
|
+
:percent_population_immigranted_recent => 0.07,
|
77
|
+
:percent_population_immigranted_recent_5 => 0.07,
|
78
|
+
:percent_population_immigranted_recent_8 => 0.08,
|
79
|
+
:percent_population_immigranted_recent_10 => 0.08,
|
80
|
+
:percent_speak_english_only => 0.89,
|
81
|
+
:percent_not_speak_english_well => 0.06,
|
82
|
+
:percent_large_households_family => 0.14,
|
83
|
+
:percent_large_households_occupied => 0.13,
|
84
|
+
:mean_persons_per_occupied_household => 0.33,
|
85
|
+
:mean_persons_per_owner_occupied_household => 0.39,
|
86
|
+
:mean_persons_per_rental_occupied_household => 0.28,
|
87
|
+
:percent_persons_owner_occupied_household => 0.55,
|
88
|
+
:percent_persons_dense_housing => 0.09,
|
89
|
+
:percent_housing_less_3_bedrooms => 0.51,
|
90
|
+
:median_n_bedrooms => 0.5,
|
91
|
+
:n_vacant_households => 0.21,
|
92
|
+
:percent_housing_occupied => 0.71,
|
93
|
+
:percent_housing_owner_occupied => 0.52,
|
94
|
+
:percent_vacant_housing_boarded => 0.05,
|
95
|
+
:percent_vacant_housing_more_6_months => 0.26,
|
96
|
+
:median_year_housing_built => 0.65,
|
97
|
+
:percent_housing_no_phone => 0.14,
|
98
|
+
:percent_housing_without_full_plumbing => 0.06,
|
99
|
+
:owner_occupied_housing_lower_quartile => 0.22,
|
100
|
+
:owner_occupied_housing_median => 0.19,
|
101
|
+
:owner_occupied_housing_higher_quartile => 0.18,
|
102
|
+
:rental_housing_lower_quartile => 0.36,
|
103
|
+
:rental_housing_median => 0.35,
|
104
|
+
:rental_housing_higher_quartile => 0.38,
|
105
|
+
:median_rent => 0.34,
|
106
|
+
:median_rent_percent_household_income => 0.38,
|
107
|
+
:median_owner_cost_percent_household_income => 0.46,
|
108
|
+
:median_owner_cost_percent_household_income_no_mortgage => 0.25,
|
109
|
+
:n_people_shelter => 0.04,
|
110
|
+
:n_people_street => 0,
|
111
|
+
:percent_foreign_born => 0.12,
|
112
|
+
:percent_born_same_state => 0.42,
|
113
|
+
:percent_same_house_85 => 0.5,
|
114
|
+
:percent_same_city_85 => 0.51,
|
115
|
+
:percent_same_state_85 => 0.64,
|
116
|
+
:lemas_sworn_full_time => 0.03,
|
117
|
+
:lemas_sworn_full_time_per_population => 0.13,
|
118
|
+
:lemas_sworn_full_time_field => 0.96,
|
119
|
+
:lemas_sworn_full_time_field_per_population => 0.17,
|
120
|
+
:lemas_total_requests => 0.06,
|
121
|
+
:lemas_total_requests_per_population => 0.18,
|
122
|
+
:total_requests_per_officer => 0.44,
|
123
|
+
:n_officers_per_population => 0.13,
|
124
|
+
:racial_match_community_police => 0.94,
|
125
|
+
:percent_police_white => 0.93,
|
126
|
+
:percent_police_black => 0.03,
|
127
|
+
:percent_police_hispanic => 0.07,
|
128
|
+
:percent_police_asian => 0.1,
|
129
|
+
:percent_police_minority => 0.07,
|
130
|
+
:n_officers_assigned_drug_units => 0.02,
|
131
|
+
:n_kinds_drugs_seized => 0.57,
|
132
|
+
:police_average_overtime_worked => 0.29,
|
133
|
+
:land_area => 0.12,
|
134
|
+
:population_density => 0.26,
|
135
|
+
:percent_use_public_transit => 0.2,
|
136
|
+
:n_police_cars => 0.06,
|
137
|
+
:n_police_operating_budget => 0.04,
|
138
|
+
:lemas_percent_police_on_patrol => 0.9,
|
139
|
+
:lemas_gang_unit_deployed => 0.5,
|
140
|
+
:lemas_percent_office_drug_units => 0.32,
|
141
|
+
:police_operating_budget_per_population => 0.14,
|
142
|
+
:total_violent_crimes_per_population => 0.2,
|
143
|
+
},
|
144
|
+
{
|
145
|
+
:state => 6,
|
146
|
+
:county => nil,
|
147
|
+
:community => nil,
|
148
|
+
:community_name => "Ontariocity",
|
149
|
+
:fold => 10,
|
150
|
+
:population => 0.2,
|
151
|
+
:household_size => 0.78,
|
152
|
+
:race_percent_black => 0.14,
|
153
|
+
:race_percent_white => 0.46,
|
154
|
+
:race_percent_asian => 0.24,
|
155
|
+
:race_percent_hispanic => 0.77,
|
156
|
+
:age_percent_12_to_21 => 0.5,
|
157
|
+
:age_percent_12_to_29 => 0.62,
|
158
|
+
:age_percent_16_to_24 => 0.4,
|
159
|
+
:age_percent_65_and_upper => 0.17,
|
160
|
+
:n_people_urban => 0.21,
|
161
|
+
:percent_people_urban => 1,
|
162
|
+
:median_income => 0.4,
|
163
|
+
:percent_households_with_wage => 0.73,
|
164
|
+
:percent_households_with_farm_self => 0.22,
|
165
|
+
:percent_households_with_investment_income => 0.25,
|
166
|
+
:percent_households_with_social_security => 0.26,
|
167
|
+
:percent_households_with_public_assistant => 0.47,
|
168
|
+
:percent_households_with_retire => 0.29,
|
169
|
+
:median_family_income => 0.36,
|
170
|
+
:per_capita_income => 0.24,
|
171
|
+
:per_capita_income_white => 0.28,
|
172
|
+
:per_capita_income_black => 0.32,
|
173
|
+
:per_capita_income_indian => 0.22,
|
174
|
+
:per_capita_income_asian => 0.27,
|
175
|
+
:per_capita_income_other => 0.25,
|
176
|
+
:per_capita_income_hispanic => 0.29,
|
177
|
+
:n_people_under_poverty => 0.16,
|
178
|
+
:percent_people_under_poverty => 0.35,
|
179
|
+
:percent_less_9th_grade => 0.5,
|
180
|
+
:percent_not_high_school_graduate => 0.55,
|
181
|
+
:percent_bachelors_or_more => 0.16,
|
182
|
+
:percent_unemployed => 0.47,
|
183
|
+
:percent_employed => 0.58,
|
184
|
+
:percent_employed_manufacturing => 0.53,
|
185
|
+
:percent_employed_professional_service => 0.2,
|
186
|
+
:percent_occupations_manufacturing => 0.6,
|
187
|
+
:percent_occupations_management_professional => 0.24,
|
188
|
+
:male_percent_divorced => 0.49,
|
189
|
+
:male_percent_never_married => 0.5,
|
190
|
+
:female_percent_divorced => 0.6,
|
191
|
+
:total_percent_divorced => 0.57,
|
192
|
+
:mean_persons_per_family => 0.86,
|
193
|
+
:percent_family_2_parents => 0.61,
|
194
|
+
:percent_kids_2_parents => 0.59,
|
195
|
+
:percent_young_kids_2_parents => 0.64,
|
196
|
+
:percent_teen_2_parents => 0.6,
|
197
|
+
:percent_work_mom_young_kids => 0.35,
|
198
|
+
:percent_work_mom => 0.35,
|
199
|
+
:n_illegals => 0.11,
|
200
|
+
:percent_illegals => 0.43,
|
201
|
+
:n_immigrants => 0.2,
|
202
|
+
:percent_immigrants_recent => 0.43,
|
203
|
+
:percent_immigrants_recent_5 => 0.52,
|
204
|
+
:percent_immigrants_recent_8 => 0.58,
|
205
|
+
:percent_immigrants_recent_10 => 0.65,
|
206
|
+
:percent_population_immigranted_recent => 0.68,
|
207
|
+
:percent_population_immigranted_recent_5 => 0.73,
|
208
|
+
:percent_population_immigranted_recent_8 => 0.73,
|
209
|
+
:percent_population_immigranted_recent_10 => 0.75,
|
210
|
+
:percent_speak_english_only => 0.35,
|
211
|
+
:percent_not_speak_english_well => 0.77,
|
212
|
+
:percent_large_households_family => 0.84,
|
213
|
+
:percent_large_households_occupied => 0.83,
|
214
|
+
:mean_persons_per_occupied_household => 0.84,
|
215
|
+
:mean_persons_per_owner_occupied_household => 0.78,
|
216
|
+
:mean_persons_per_rental_occupied_household => 0.89,
|
217
|
+
:percent_persons_owner_occupied_household => 0.46,
|
218
|
+
:percent_persons_dense_housing => 0.76,
|
219
|
+
:percent_housing_less_3_bedrooms => 0.55,
|
220
|
+
:median_n_bedrooms => 0,
|
221
|
+
:n_vacant_households => 0.12,
|
222
|
+
:percent_housing_occupied => 0.8,
|
223
|
+
:percent_housing_owner_occupied => 0.49,
|
224
|
+
:percent_vacant_housing_boarded => 0.1,
|
225
|
+
:percent_vacant_housing_more_6_months => 0.16,
|
226
|
+
:median_year_housing_built => 0.71,
|
227
|
+
:percent_housing_no_phone => 0.32,
|
228
|
+
:percent_housing_without_full_plumbing => 0.21,
|
229
|
+
:owner_occupied_housing_lower_quartile => 0.35,
|
230
|
+
:owner_occupied_housing_median => 0.35,
|
231
|
+
:owner_occupied_housing_higher_quartile => 0.32,
|
232
|
+
:rental_housing_lower_quartile => 0.54,
|
233
|
+
:rental_housing_median => 0.53,
|
234
|
+
:rental_housing_higher_quartile => 0.61,
|
235
|
+
:median_rent => 0.54,
|
236
|
+
:median_rent_percent_household_income => 0.69,
|
237
|
+
:median_owner_cost_percent_household_income => 0.73,
|
238
|
+
:median_owner_cost_percent_household_income_no_mortgage => 0.21,
|
239
|
+
:n_people_shelter => 0.08,
|
240
|
+
:n_people_street => 0.08,
|
241
|
+
:percent_foreign_born => 0.68,
|
242
|
+
:percent_born_same_state => 0.5,
|
243
|
+
:percent_same_house_85 => 0.34,
|
244
|
+
:percent_same_city_85 => 0.35,
|
245
|
+
:percent_same_state_85 => 0.68,
|
246
|
+
:lemas_sworn_full_time => 0.03,
|
247
|
+
:lemas_sworn_full_time_per_population => 0.11,
|
248
|
+
:lemas_sworn_full_time_field => 0.96,
|
249
|
+
:lemas_sworn_full_time_field_per_population => 0.14,
|
250
|
+
:lemas_total_requests => 0.04,
|
251
|
+
:lemas_total_requests_per_population => 0.11,
|
252
|
+
:total_requests_per_officer => 0.3,
|
253
|
+
:n_officers_per_population => 0.11,
|
254
|
+
:racial_match_community_police => 0.45,
|
255
|
+
:percent_police_white => 0.74,
|
256
|
+
:percent_police_black => 0.34,
|
257
|
+
:percent_police_hispanic => 0.07,
|
258
|
+
:percent_police_asian => 0,
|
259
|
+
:percent_police_minority => 0.28,
|
260
|
+
:n_officers_assigned_drug_units => 0.07,
|
261
|
+
:n_kinds_drugs_seized => 0.36,
|
262
|
+
:police_average_overtime_worked => 0.11,
|
263
|
+
:land_area => 0.11,
|
264
|
+
:population_density => 0.3,
|
265
|
+
:percent_use_public_transit => 0.05,
|
266
|
+
:n_police_cars => 0.08,
|
267
|
+
:n_police_operating_budget => 0.04,
|
268
|
+
:lemas_percent_police_on_patrol => 0.73,
|
269
|
+
:lemas_gang_unit_deployed => 0.5,
|
270
|
+
:lemas_percent_office_drug_units => 1,
|
271
|
+
:police_operating_budget_per_population => 0.13,
|
272
|
+
:total_violent_crimes_per_population => 0.48
|
273
|
+
},
|
274
|
+
],
|
275
|
+
[
|
276
|
+
records.size,
|
277
|
+
records[0].to_h,
|
278
|
+
records[-1].to_h
|
279
|
+
])
|
280
|
+
end
|
281
|
+
|
282
|
+
sub_test_case("#metadata") do
|
283
|
+
test("#description") do
|
284
|
+
description = @dataset.metadata.description
|
285
|
+
assert do
|
286
|
+
description.start_with?("Title: Communities and Crime")
|
287
|
+
end
|
288
|
+
end
|
289
|
+
end
|
290
|
+
end
|