red-datasets 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,4 @@
1
+ module Datasets
2
+ class Error < StandardError
3
+ end
4
+ end
@@ -2,8 +2,6 @@ require 'zlib'
2
2
 
3
3
  require_relative "dataset"
4
4
 
5
- class SetTypeError < StandardError; end
6
-
7
5
  module Datasets
8
6
  class MNIST < Dataset
9
7
  BASE_URL = "http://yann.lecun.com/exdb/mnist/"
@@ -0,0 +1,125 @@
1
+ require_relative "dataset"
2
+
3
+ module Datasets
4
+ module PenguinsRawData
5
+ Record = Struct.new(:study_name,
6
+ :sample_number,
7
+ :species,
8
+ :region,
9
+ :island,
10
+ :stage,
11
+ :individual_id,
12
+ :clutch_completion,
13
+ :date_egg,
14
+ :culmen_length_mm,
15
+ :culmen_depth_mm,
16
+ :flipper_length_mm,
17
+ :body_mass_g,
18
+ :sex,
19
+ :delta_15_n_permil,
20
+ :delta_13_c_permil,
21
+ :comments)
22
+
23
+ class SpeciesBase < Dataset
24
+ def initialize
25
+ super
26
+ species = self.class.name.split("::").last.downcase
27
+ @metadata.id = "palmerpenguins-raw-#{species}"
28
+ @metadata.url = self.class::URL
29
+ @metadata.licenses = ["CC0"]
30
+ @data_path = cache_dir_path + "penguins" + (species + ".csv")
31
+ end
32
+
33
+ attr_reader :data_path
34
+
35
+ def each
36
+ return to_enum(__method__) unless block_given?
37
+
38
+ open_data do |csv|
39
+ csv.each do |row|
40
+ next if row[0].nil?
41
+ record = Record.new(*row.fields)
42
+ yield record
43
+ end
44
+ end
45
+ end
46
+
47
+ private def open_data
48
+ download unless data_path.exist?
49
+ CSV.open(data_path, headers: :first_row, converters: :all) do |csv|
50
+ yield csv
51
+ end
52
+ end
53
+
54
+ private def download
55
+ super(data_path, metadata.url)
56
+ end
57
+ end
58
+
59
+ # Adelie penguin data from: https://doi.org/10.6073/pasta/abc50eed9138b75f54eaada0841b9b86
60
+ class Adelie < SpeciesBase
61
+ DOI = "doi.org/10.6073/pasta/abc50eed9138b75f54eaada0841b9b86".freeze
62
+ URL = "https://portal.edirepository.org/nis/dataviewer?packageid=knb-lter-pal.219.3&entityid=002f3893385f710df69eeebe893144ff".freeze
63
+ end
64
+
65
+ # Gentoo penguin data from: https://doi.org/10.6073/pasta/2b1cff60f81640f182433d23e68541ce
66
+ class Gentoo < SpeciesBase
67
+ DOI = "doi.org/10.6073/pasta/2b1cff60f81640f182433d23e68541ce".freeze
68
+ URL = "https://portal.edirepository.org/nis/dataviewer?packageid=knb-lter-pal.220.3&entityid=e03b43c924f226486f2f0ab6709d2381".freeze
69
+ end
70
+
71
+ # Chinstrap penguin data from: https://doi.org/10.6073/pasta/409c808f8fc9899d02401bdb04580af7
72
+ class Chinstrap < SpeciesBase
73
+ DOI = "doi.org/10.6073/pasta/409c808f8fc9899d02401bdb04580af7".freeze
74
+ URL = "https://portal.edirepository.org/nis/dataviewer?packageid=knb-lter-pal.221.2&entityid=fe853aa8f7a59aa84cdd3197619ef462".freeze
75
+ end
76
+ end
77
+
78
+ # This dataset provides the same dataset as https://github.com/allisonhorst/palmerpenguins
79
+ class Penguins < Dataset
80
+ Record = Struct.new(:species,
81
+ :island,
82
+ :bill_length_mm,
83
+ :bill_depth_mm,
84
+ :flipper_length_mm,
85
+ :body_mass_g,
86
+ :sex,
87
+ :year)
88
+
89
+ def initialize
90
+ super
91
+ @metadata.id = "palmerpenguins"
92
+ @metadata.name = "palmerpenguins"
93
+ @metadata.url = "https://allisonhorst.github.io/palmerpenguins/"
94
+ @metadata.licenses = ["CC0"]
95
+ @metadata.description = "A great dataset for data exploration & visualization, as an alternative to iris"
96
+ end
97
+
98
+ def each(&block)
99
+ return to_enum(__method__) unless block_given?
100
+
101
+ species_classes = [
102
+ PenguinsRawData::Adelie,
103
+ PenguinsRawData::Gentoo,
104
+ PenguinsRawData::Chinstrap
105
+ ]
106
+
107
+ species_classes.each do |species_class|
108
+ species_class.new.each do |raw_record|
109
+ yield convert_record(raw_record)
110
+ end
111
+ end
112
+ end
113
+
114
+ private def convert_record(raw_record)
115
+ Record.new(raw_record.species.split(' ')[0],
116
+ raw_record.island,
117
+ raw_record.culmen_length_mm,
118
+ raw_record.culmen_depth_mm,
119
+ raw_record.flipper_length_mm&.to_i,
120
+ raw_record.body_mass_g&.to_i,
121
+ raw_record.sex&.downcase,
122
+ raw_record.date_egg&.year)
123
+ end
124
+ end
125
+ end
@@ -1,3 +1,3 @@
1
1
  module Datasets
2
- VERSION = "0.1.0"
2
+ VERSION = "0.1.1"
3
3
  end
data/red-datasets.gemspec CHANGED
@@ -35,6 +35,7 @@ Gem::Specification.new do |spec|
35
35
  spec.test_files += Dir.glob("test/**/*")
36
36
 
37
37
  spec.add_runtime_dependency("csv", ">= 3.0.5")
38
+ spec.add_runtime_dependency("rexml")
38
39
  spec.add_runtime_dependency("rubyzip")
39
40
 
40
41
  spec.add_development_dependency("bundler")
data/test/run-test.rb CHANGED
@@ -13,4 +13,6 @@ $LOAD_PATH.unshift(lib_dir.to_s)
13
13
 
14
14
  require_relative "helper"
15
15
 
16
+ ARGV.unshift("--max-diff-target-string-size=#{10 * 1024}")
17
+
16
18
  exit(Test::Unit::AutoRunner.run(true, test_dir.to_s))
@@ -0,0 +1,180 @@
1
+ class CLDRPluralsTest < Test::Unit::TestCase
2
+ def setup
3
+ @dataset = Datasets::CLDRPlurals.new
4
+ end
5
+
6
+ def locale(*args)
7
+ Datasets::CLDRPlurals::Locale.new(*args)
8
+ end
9
+
10
+ def rule(*args)
11
+ Datasets::CLDRPlurals::Rule.new(*args)
12
+ end
13
+
14
+ test("#each") do
15
+ locales = @dataset.each.to_a
16
+ assert_equal([
17
+ 215,
18
+ locale("bm",
19
+ [
20
+ rule("other",
21
+ nil,
22
+ [
23
+ 0..15,
24
+ 100,
25
+ 1000,
26
+ 10000,
27
+ 100000,
28
+ 1000000,
29
+ :elipsis,
30
+ ],
31
+ [
32
+ 0.0..1.5,
33
+ 10.0,
34
+ 100.0,
35
+ 1000.0,
36
+ 10000.0,
37
+ 100000.0,
38
+ 1000000.0,
39
+ :elipsis,
40
+ ])
41
+ ]),
42
+ locale("kw",
43
+ [
44
+ rule("zero",
45
+ [:equal, "n", [0]],
46
+ [0],
47
+ [0.0, 0.00, 0.000, 0.0000]),
48
+ rule("one",
49
+ [:equal, "n", [1]],
50
+ [1],
51
+ [1.0, 1.00, 1.000, 1.0000]),
52
+ rule("two",
53
+ [:or,
54
+ [:equal,
55
+ [:mod, "n", 100],
56
+ [2, 22, 42, 62, 82]],
57
+ [:and,
58
+ [:equal, [:mod, "n", 1000], [0]],
59
+ [:equal,
60
+ [:mod, "n", 100000],
61
+ [1000..20000, 40000, 60000, 80000]]],
62
+ [:and,
63
+ [:not_equal, "n", [0]],
64
+ [:equal, [:mod, "n", 1000000], [100000]]]],
65
+ [
66
+ 2,
67
+ 22,
68
+ 42,
69
+ 62,
70
+ 82,
71
+ 102,
72
+ 122,
73
+ 142,
74
+ 1000,
75
+ 10000,
76
+ 100000,
77
+ :elipsis,
78
+ ],
79
+ [
80
+ 2.0,
81
+ 22.0,
82
+ 42.0,
83
+ 62.0,
84
+ 82.0,
85
+ 102.0,
86
+ 122.0,
87
+ 142.0,
88
+ 1000.0,
89
+ 10000.0,
90
+ 100000.0,
91
+ :elipsis,
92
+ ]),
93
+ rule("few",
94
+ [:equal,
95
+ [:mod, "n", 100],
96
+ [3, 23, 43, 63, 83]],
97
+ [
98
+ 3,
99
+ 23,
100
+ 43,
101
+ 63,
102
+ 83,
103
+ 103,
104
+ 123,
105
+ 143,
106
+ 1003,
107
+ :elipsis,
108
+ ],
109
+ [
110
+ 3.0,
111
+ 23.0,
112
+ 43.0,
113
+ 63.0,
114
+ 83.0,
115
+ 103.0,
116
+ 123.0,
117
+ 143.0,
118
+ 1003.0,
119
+ :elipsis,
120
+ ]),
121
+ rule("many",
122
+ [:and,
123
+ [:not_equal, "n", [1]],
124
+ [:equal,
125
+ [:mod, "n", 100],
126
+ [1, 21, 41, 61, 81]]],
127
+ [
128
+ 21,
129
+ 41,
130
+ 61,
131
+ 81,
132
+ 101,
133
+ 121,
134
+ 141,
135
+ 161,
136
+ 1001,
137
+ :elipsis,
138
+ ],
139
+ [
140
+ 21.0,
141
+ 41.0,
142
+ 61.0,
143
+ 81.0,
144
+ 101.0,
145
+ 121.0,
146
+ 141.0,
147
+ 161.0,
148
+ 1001.0,
149
+ :elipsis,
150
+ ]),
151
+ rule("other",
152
+ nil,
153
+ [4..19, 100, 1004, 1000000, :elipsis],
154
+ [
155
+ 0.1..0.9,
156
+ 1.1..1.7,
157
+ 10.0,
158
+ 100.0,
159
+ 1000.1,
160
+ 1000000.0,
161
+ :elipsis,
162
+ ]),
163
+ ]),
164
+ ],
165
+ [
166
+ locales.size,
167
+ locales[0],
168
+ locales[-4],
169
+ ])
170
+ end
171
+
172
+ sub_test_case("#metadata") do
173
+ test("#description") do
174
+ description = @dataset.metadata.description
175
+ assert do
176
+ description.start_with?("Language plural rules in Unicode Common Locale Data Repository.")
177
+ end
178
+ end
179
+ end
180
+ end
@@ -0,0 +1,290 @@
1
+ class CommunitiesTest < Test::Unit::TestCase
2
+ def setup
3
+ @dataset = Datasets::Communities.new
4
+ end
5
+
6
+ def record(*args)
7
+ Datasets::Communities::Record.new(*args)
8
+ end
9
+
10
+ test('#each') do
11
+ records = @dataset.each.to_a
12
+ assert_equal([
13
+ 1994,
14
+ {
15
+ :state => 8,
16
+ :county => nil,
17
+ :community => nil,
18
+ :community_name => "Lakewoodcity",
19
+ :fold => 1,
20
+ :population => 0.19,
21
+ :household_size => 0.33,
22
+ :race_percent_black => 0.02,
23
+ :race_percent_white => 0.9,
24
+ :race_percent_asian => 0.12,
25
+ :race_percent_hispanic => 0.17,
26
+ :age_percent_12_to_21 => 0.34,
27
+ :age_percent_12_to_29 => 0.47,
28
+ :age_percent_16_to_24 => 0.29,
29
+ :age_percent_65_and_upper => 0.32,
30
+ :n_people_urban => 0.2,
31
+ :percent_people_urban => 1,
32
+ :median_income => 0.37,
33
+ :percent_households_with_wage => 0.72,
34
+ :percent_households_with_farm_self => 0.34,
35
+ :percent_households_with_investment_income => 0.6,
36
+ :percent_households_with_social_security => 0.29,
37
+ :percent_households_with_public_assistant => 0.15,
38
+ :percent_households_with_retire => 0.43,
39
+ :median_family_income => 0.39,
40
+ :per_capita_income => 0.4,
41
+ :per_capita_income_white => 0.39,
42
+ :per_capita_income_black => 0.32,
43
+ :per_capita_income_indian => 0.27,
44
+ :per_capita_income_asian => 0.27,
45
+ :per_capita_income_other => 0.36,
46
+ :per_capita_income_hispanic => 0.41,
47
+ :n_people_under_poverty => 0.08,
48
+ :percent_people_under_poverty => 0.19,
49
+ :percent_less_9th_grade => 0.1,
50
+ :percent_not_high_school_graduate => 0.18,
51
+ :percent_bachelors_or_more => 0.48,
52
+ :percent_unemployed => 0.27,
53
+ :percent_employed => 0.68,
54
+ :percent_employed_manufacturing => 0.23,
55
+ :percent_employed_professional_service => 0.41,
56
+ :percent_occupations_manufacturing => 0.25,
57
+ :percent_occupations_management_professional => 0.52,
58
+ :male_percent_divorced => 0.68,
59
+ :male_percent_never_married => 0.4,
60
+ :female_percent_divorced => 0.75,
61
+ :total_percent_divorced => 0.75,
62
+ :mean_persons_per_family => 0.35,
63
+ :percent_family_2_parents => 0.55,
64
+ :percent_kids_2_parents => 0.59,
65
+ :percent_young_kids_2_parents => 0.61,
66
+ :percent_teen_2_parents => 0.56,
67
+ :percent_work_mom_young_kids => 0.74,
68
+ :percent_work_mom => 0.76,
69
+ :n_illegals => 0.04,
70
+ :percent_illegals => 0.14,
71
+ :n_immigrants => 0.03,
72
+ :percent_immigrants_recent => 0.24,
73
+ :percent_immigrants_recent_5 => 0.27,
74
+ :percent_immigrants_recent_8 => 0.37,
75
+ :percent_immigrants_recent_10 => 0.39,
76
+ :percent_population_immigranted_recent => 0.07,
77
+ :percent_population_immigranted_recent_5 => 0.07,
78
+ :percent_population_immigranted_recent_8 => 0.08,
79
+ :percent_population_immigranted_recent_10 => 0.08,
80
+ :percent_speak_english_only => 0.89,
81
+ :percent_not_speak_english_well => 0.06,
82
+ :percent_large_households_family => 0.14,
83
+ :percent_large_households_occupied => 0.13,
84
+ :mean_persons_per_occupied_household => 0.33,
85
+ :mean_persons_per_owner_occupied_household => 0.39,
86
+ :mean_persons_per_rental_occupied_household => 0.28,
87
+ :percent_persons_owner_occupied_household => 0.55,
88
+ :percent_persons_dense_housing => 0.09,
89
+ :percent_housing_less_3_bedrooms => 0.51,
90
+ :median_n_bedrooms => 0.5,
91
+ :n_vacant_households => 0.21,
92
+ :percent_housing_occupied => 0.71,
93
+ :percent_housing_owner_occupied => 0.52,
94
+ :percent_vacant_housing_boarded => 0.05,
95
+ :percent_vacant_housing_more_6_months => 0.26,
96
+ :median_year_housing_built => 0.65,
97
+ :percent_housing_no_phone => 0.14,
98
+ :percent_housing_without_full_plumbing => 0.06,
99
+ :owner_occupied_housing_lower_quartile => 0.22,
100
+ :owner_occupied_housing_median => 0.19,
101
+ :owner_occupied_housing_higher_quartile => 0.18,
102
+ :rental_housing_lower_quartile => 0.36,
103
+ :rental_housing_median => 0.35,
104
+ :rental_housing_higher_quartile => 0.38,
105
+ :median_rent => 0.34,
106
+ :median_rent_percent_household_income => 0.38,
107
+ :median_owner_cost_percent_household_income => 0.46,
108
+ :median_owner_cost_percent_household_income_no_mortgage => 0.25,
109
+ :n_people_shelter => 0.04,
110
+ :n_people_street => 0,
111
+ :percent_foreign_born => 0.12,
112
+ :percent_born_same_state => 0.42,
113
+ :percent_same_house_85 => 0.5,
114
+ :percent_same_city_85 => 0.51,
115
+ :percent_same_state_85 => 0.64,
116
+ :lemas_sworn_full_time => 0.03,
117
+ :lemas_sworn_full_time_per_population => 0.13,
118
+ :lemas_sworn_full_time_field => 0.96,
119
+ :lemas_sworn_full_time_field_per_population => 0.17,
120
+ :lemas_total_requests => 0.06,
121
+ :lemas_total_requests_per_population => 0.18,
122
+ :total_requests_per_officer => 0.44,
123
+ :n_officers_per_population => 0.13,
124
+ :racial_match_community_police => 0.94,
125
+ :percent_police_white => 0.93,
126
+ :percent_police_black => 0.03,
127
+ :percent_police_hispanic => 0.07,
128
+ :percent_police_asian => 0.1,
129
+ :percent_police_minority => 0.07,
130
+ :n_officers_assigned_drug_units => 0.02,
131
+ :n_kinds_drugs_seized => 0.57,
132
+ :police_average_overtime_worked => 0.29,
133
+ :land_area => 0.12,
134
+ :population_density => 0.26,
135
+ :percent_use_public_transit => 0.2,
136
+ :n_police_cars => 0.06,
137
+ :n_police_operating_budget => 0.04,
138
+ :lemas_percent_police_on_patrol => 0.9,
139
+ :lemas_gang_unit_deployed => 0.5,
140
+ :lemas_percent_office_drug_units => 0.32,
141
+ :police_operating_budget_per_population => 0.14,
142
+ :total_violent_crimes_per_population => 0.2,
143
+ },
144
+ {
145
+ :state => 6,
146
+ :county => nil,
147
+ :community => nil,
148
+ :community_name => "Ontariocity",
149
+ :fold => 10,
150
+ :population => 0.2,
151
+ :household_size => 0.78,
152
+ :race_percent_black => 0.14,
153
+ :race_percent_white => 0.46,
154
+ :race_percent_asian => 0.24,
155
+ :race_percent_hispanic => 0.77,
156
+ :age_percent_12_to_21 => 0.5,
157
+ :age_percent_12_to_29 => 0.62,
158
+ :age_percent_16_to_24 => 0.4,
159
+ :age_percent_65_and_upper => 0.17,
160
+ :n_people_urban => 0.21,
161
+ :percent_people_urban => 1,
162
+ :median_income => 0.4,
163
+ :percent_households_with_wage => 0.73,
164
+ :percent_households_with_farm_self => 0.22,
165
+ :percent_households_with_investment_income => 0.25,
166
+ :percent_households_with_social_security => 0.26,
167
+ :percent_households_with_public_assistant => 0.47,
168
+ :percent_households_with_retire => 0.29,
169
+ :median_family_income => 0.36,
170
+ :per_capita_income => 0.24,
171
+ :per_capita_income_white => 0.28,
172
+ :per_capita_income_black => 0.32,
173
+ :per_capita_income_indian => 0.22,
174
+ :per_capita_income_asian => 0.27,
175
+ :per_capita_income_other => 0.25,
176
+ :per_capita_income_hispanic => 0.29,
177
+ :n_people_under_poverty => 0.16,
178
+ :percent_people_under_poverty => 0.35,
179
+ :percent_less_9th_grade => 0.5,
180
+ :percent_not_high_school_graduate => 0.55,
181
+ :percent_bachelors_or_more => 0.16,
182
+ :percent_unemployed => 0.47,
183
+ :percent_employed => 0.58,
184
+ :percent_employed_manufacturing => 0.53,
185
+ :percent_employed_professional_service => 0.2,
186
+ :percent_occupations_manufacturing => 0.6,
187
+ :percent_occupations_management_professional => 0.24,
188
+ :male_percent_divorced => 0.49,
189
+ :male_percent_never_married => 0.5,
190
+ :female_percent_divorced => 0.6,
191
+ :total_percent_divorced => 0.57,
192
+ :mean_persons_per_family => 0.86,
193
+ :percent_family_2_parents => 0.61,
194
+ :percent_kids_2_parents => 0.59,
195
+ :percent_young_kids_2_parents => 0.64,
196
+ :percent_teen_2_parents => 0.6,
197
+ :percent_work_mom_young_kids => 0.35,
198
+ :percent_work_mom => 0.35,
199
+ :n_illegals => 0.11,
200
+ :percent_illegals => 0.43,
201
+ :n_immigrants => 0.2,
202
+ :percent_immigrants_recent => 0.43,
203
+ :percent_immigrants_recent_5 => 0.52,
204
+ :percent_immigrants_recent_8 => 0.58,
205
+ :percent_immigrants_recent_10 => 0.65,
206
+ :percent_population_immigranted_recent => 0.68,
207
+ :percent_population_immigranted_recent_5 => 0.73,
208
+ :percent_population_immigranted_recent_8 => 0.73,
209
+ :percent_population_immigranted_recent_10 => 0.75,
210
+ :percent_speak_english_only => 0.35,
211
+ :percent_not_speak_english_well => 0.77,
212
+ :percent_large_households_family => 0.84,
213
+ :percent_large_households_occupied => 0.83,
214
+ :mean_persons_per_occupied_household => 0.84,
215
+ :mean_persons_per_owner_occupied_household => 0.78,
216
+ :mean_persons_per_rental_occupied_household => 0.89,
217
+ :percent_persons_owner_occupied_household => 0.46,
218
+ :percent_persons_dense_housing => 0.76,
219
+ :percent_housing_less_3_bedrooms => 0.55,
220
+ :median_n_bedrooms => 0,
221
+ :n_vacant_households => 0.12,
222
+ :percent_housing_occupied => 0.8,
223
+ :percent_housing_owner_occupied => 0.49,
224
+ :percent_vacant_housing_boarded => 0.1,
225
+ :percent_vacant_housing_more_6_months => 0.16,
226
+ :median_year_housing_built => 0.71,
227
+ :percent_housing_no_phone => 0.32,
228
+ :percent_housing_without_full_plumbing => 0.21,
229
+ :owner_occupied_housing_lower_quartile => 0.35,
230
+ :owner_occupied_housing_median => 0.35,
231
+ :owner_occupied_housing_higher_quartile => 0.32,
232
+ :rental_housing_lower_quartile => 0.54,
233
+ :rental_housing_median => 0.53,
234
+ :rental_housing_higher_quartile => 0.61,
235
+ :median_rent => 0.54,
236
+ :median_rent_percent_household_income => 0.69,
237
+ :median_owner_cost_percent_household_income => 0.73,
238
+ :median_owner_cost_percent_household_income_no_mortgage => 0.21,
239
+ :n_people_shelter => 0.08,
240
+ :n_people_street => 0.08,
241
+ :percent_foreign_born => 0.68,
242
+ :percent_born_same_state => 0.5,
243
+ :percent_same_house_85 => 0.34,
244
+ :percent_same_city_85 => 0.35,
245
+ :percent_same_state_85 => 0.68,
246
+ :lemas_sworn_full_time => 0.03,
247
+ :lemas_sworn_full_time_per_population => 0.11,
248
+ :lemas_sworn_full_time_field => 0.96,
249
+ :lemas_sworn_full_time_field_per_population => 0.14,
250
+ :lemas_total_requests => 0.04,
251
+ :lemas_total_requests_per_population => 0.11,
252
+ :total_requests_per_officer => 0.3,
253
+ :n_officers_per_population => 0.11,
254
+ :racial_match_community_police => 0.45,
255
+ :percent_police_white => 0.74,
256
+ :percent_police_black => 0.34,
257
+ :percent_police_hispanic => 0.07,
258
+ :percent_police_asian => 0,
259
+ :percent_police_minority => 0.28,
260
+ :n_officers_assigned_drug_units => 0.07,
261
+ :n_kinds_drugs_seized => 0.36,
262
+ :police_average_overtime_worked => 0.11,
263
+ :land_area => 0.11,
264
+ :population_density => 0.3,
265
+ :percent_use_public_transit => 0.05,
266
+ :n_police_cars => 0.08,
267
+ :n_police_operating_budget => 0.04,
268
+ :lemas_percent_police_on_patrol => 0.73,
269
+ :lemas_gang_unit_deployed => 0.5,
270
+ :lemas_percent_office_drug_units => 1,
271
+ :police_operating_budget_per_population => 0.13,
272
+ :total_violent_crimes_per_population => 0.48
273
+ },
274
+ ],
275
+ [
276
+ records.size,
277
+ records[0].to_h,
278
+ records[-1].to_h
279
+ ])
280
+ end
281
+
282
+ sub_test_case("#metadata") do
283
+ test("#description") do
284
+ description = @dataset.metadata.description
285
+ assert do
286
+ description.start_with?("Title: Communities and Crime")
287
+ end
288
+ end
289
+ end
290
+ end