rboc 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/rboc.rb +106 -197
- data/lib/rboc/census.rb +137 -0
- data/lib/rboc/data.rb +136 -1
- metadata +9 -9
- data/data/acs_1yr_profile_2012.xml +0 -2643
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 05e933b4d3dc2ece95ee01a0928a0f3e0a38ce6a
|
4
|
+
data.tar.gz: bb7a176c5f7e96e9486c65bf38ebb2b3af65a308
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e6117dbe696ec68046c6471d33d75014e1ed072cd2064de75abf775e15375fc69212c6d0db0a453cd21871551e17828389b384cc851ae01ab9408258f0c32508
|
7
|
+
data.tar.gz: 777e7158a44d4470518ad4d737729c4c9999a9542dd0bee37d02a5c747eca6f12711e8d77d05366d654f67b8984331d56ded9b27b1834fa21792072d9d90ea99
|
data/lib/rboc.rb
CHANGED
@@ -1,226 +1,135 @@
|
|
1
1
|
require 'curb'
|
2
2
|
require 'json'
|
3
3
|
require 'uri'
|
4
|
+
require 'set'
|
4
5
|
|
6
|
+
require 'rboc/census'
|
5
7
|
require 'rboc/geo'
|
6
8
|
require 'rboc/data'
|
7
9
|
|
8
10
|
# A module defining methods for accessing the U.S. Census data API.
|
9
11
|
#
|
12
|
+
# Census data is divided between a number of files, like the American Community Survey
|
13
|
+
# (ACS) 5 year estimates file, the ACS 3 year estimates file, and the 2010 Census
|
14
|
+
# summary file. See the {data documentation}[http://www.census.gov/developers/data/] on
|
15
|
+
# the Census website for a description of all available files.
|
16
|
+
#
|
17
|
+
# In +rboc+, the list of available files (using abbreviated names) is contained in
|
18
|
+
# +Census::DATA_SETS+. Each entry in that array corresponds to a class constant in
|
19
|
+
# +Census+ assigned to a +Census::DataSet+ instance. A DataSet object contains one or
|
20
|
+
# more DataSetVintage objects which represent particular vintage for the given survey.
|
21
|
+
# Use the DataSet#vintage_years method to see the vintage years available.
|
22
|
+
#
|
23
|
+
# Census::ACS5.vintage_years
|
24
|
+
# # => [2010, 2011, 2012]
|
25
|
+
#
|
26
|
+
# To access a particular data set vintage, use square brackets.
|
27
|
+
#
|
28
|
+
# Census::ACS5[2010].class
|
29
|
+
# # => Census::DataSetVintage
|
30
|
+
#
|
31
|
+
# To download data, use the +query+ method on a DataSet or DataSetVintage object.
|
32
|
+
# Calling #query on the containing DataSet is the same as calling #query on the most
|
33
|
+
# recent vintage year.
|
34
|
+
#
|
35
|
+
# Census::ACS5.query(q=Census::Query.new) {|q| ...}
|
36
|
+
# # returns data for most recent vintage year
|
37
|
+
# Census::ACS5[2010].query(q=Census::Query.new) {|q| ...}
|
38
|
+
# # returns data for 2010 vintage year
|
39
|
+
#
|
40
|
+
# If a block is passed it is called on the Census::Query argument. Queries return
|
41
|
+
# Census::ResultSet. For each file there is also a "raw" query method with the same
|
42
|
+
# signature:
|
43
|
+
#
|
44
|
+
# Census::ACS5.query_raw(q=Census::Query.new) {|q| ...}
|
45
|
+
#
|
46
|
+
# The raw version returns the unmodified response string, which gives the requested
|
47
|
+
# data in JSON format. Note, however, that +#query_raw+ will raise an error if you try to
|
48
|
+
# download more than 50 variables (this is a restriction of the Census API). #query
|
49
|
+
# will break your request into chunks and merge them into a single response object.
|
50
|
+
#
|
51
|
+
# Examples:
|
52
|
+
#
|
53
|
+
# # In the following examples I assume the user has installed a key locally, so a
|
54
|
+
# key is not # specified in query parameters.
|
55
|
+
#
|
56
|
+
# # Create a query to request the total population for each county in Iowa.
|
57
|
+
# require 'rboc'
|
58
|
+
# my_q = Census::Query.new
|
59
|
+
# my_q.variables = ['B00001_001E'] # this needs to be an array
|
60
|
+
# my_q.geo.summary_level = 'county'
|
61
|
+
# my_q.geo.contained_in = { 'state' => 19 }
|
62
|
+
#
|
63
|
+
# # Pass the query to an appropriate Census file, examine the returned column names, and
|
64
|
+
# # iterate over the results.
|
65
|
+
# result = Census::ACS5.query my_q
|
66
|
+
# result.colnames
|
67
|
+
# # => ["B00001_001E", "state", "county"]
|
68
|
+
# result.each {|row| p row}
|
69
|
+
# # {"B00001_001E" => "1461", "state" => "19", "county" => "001"}
|
70
|
+
# # {"B00001_001E" => "823", "state" => "19", "county" => "003"}
|
71
|
+
# # ...
|
72
|
+
#
|
73
|
+
# # You can also iterate over rows without column names
|
74
|
+
# result.rows.each {|row| p row}
|
75
|
+
# # ["1461", "19", "001"]
|
76
|
+
# # ["823", "19", "003"]
|
77
|
+
# # ...
|
78
|
+
#
|
79
|
+
# # You can use a block to set query parameters.
|
80
|
+
# result2 = Census::ACS5.query do |q|
|
81
|
+
# q.variables = ['B00001_001E']
|
82
|
+
# q.geo.summary_level = 'county'
|
83
|
+
# q.geo.contained_in = { 'state' => 19 }
|
84
|
+
# end
|
85
|
+
# result2 == result
|
86
|
+
# # => true
|
87
|
+
#
|
88
|
+
# # There is a second, chainable syntax for defining query parameters that
|
89
|
+
# # is convenient for one-liners.
|
90
|
+
# result3 = Census::ACS5.query {|q| q.get('B00001_001E').for('county').in('state' => 19)}
|
91
|
+
# result3 = result
|
92
|
+
# # => true
|
93
|
+
#
|
10
94
|
module Census
|
11
95
|
|
12
96
|
# Base URL of the Census data API.
|
13
97
|
#
|
14
98
|
API_URL = 'http://api.census.gov/data'
|
15
99
|
|
16
|
-
#
|
17
|
-
#
|
18
|
-
INSTALLED_KEY_REL_PATH = '../data/installed_key'
|
19
|
-
|
20
|
-
# Path to the installed API key.
|
100
|
+
# Where to store local data
|
21
101
|
#
|
22
|
-
|
102
|
+
LOCAL_DATA_DIR = File.join ENV['HOME'], '.census'
|
23
103
|
|
24
|
-
#
|
104
|
+
# Where cached responses from the Census API. Only data descriptions are stored.
|
25
105
|
#
|
26
|
-
|
106
|
+
CACHE_DIR = File.join LOCAL_DATA_DIR, 'cache'
|
27
107
|
|
28
|
-
#
|
108
|
+
# Path to the installed API key.
|
29
109
|
#
|
30
|
-
|
31
|
-
'acs1' => [2012],
|
32
|
-
'acs1_cd' => [2011],
|
33
|
-
'acs3' => [2012],
|
34
|
-
'acs5' => [2011, 2010],
|
35
|
-
'sf1' => [2010, 2000, 1990],
|
36
|
-
'sf3' => [2000, 1990]
|
37
|
-
}
|
38
|
-
|
39
|
-
FILE_URL_SUBST = {
|
40
|
-
'acs1' => 'acs1/profile',
|
41
|
-
'acs3' => 'acs3/profile'
|
42
|
-
}
|
43
|
-
|
44
|
-
class CensusApiError < StandardError; end
|
45
|
-
class InvalidQueryError < CensusApiError; end
|
46
|
-
class InvalidKeyError < CensusApiError; end
|
47
|
-
class NoMatchingRecordsError < CensusApiError; end
|
48
|
-
class ServerSideError < CensusApiError; end
|
110
|
+
INSTALLED_KEY_PATH = File.join LOCAL_DATA_DIR, 'installed_key'
|
49
111
|
|
50
|
-
#
|
112
|
+
# Data discoverable API URL
|
51
113
|
#
|
52
|
-
|
53
|
-
attr_accessor :variables, :geo
|
54
|
-
|
55
|
-
def initialize
|
56
|
-
@variables = []
|
57
|
-
@geo = Geography.new
|
58
|
-
end
|
59
|
-
|
60
|
-
def api_key=(key)
|
61
|
-
@api_key = key
|
62
|
-
end
|
114
|
+
DATA_DISCOVERY_URL = 'http://api.census.gov/data.json'
|
63
115
|
|
64
|
-
|
65
|
-
|
66
|
-
#
|
67
|
-
def api_key
|
68
|
-
@api_key ||= Census.installed_key
|
69
|
-
end
|
70
|
-
|
71
|
-
# these chainable methods mirror the field names in the HTTP get string
|
116
|
+
self.setup_local_directory!
|
117
|
+
data_sets = JSON.parse self.get_cached_url(DATA_DISCOVERY_URL)
|
72
118
|
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
def for(level)
|
79
|
-
@geo.summary_level = level
|
80
|
-
self
|
81
|
-
end
|
82
|
-
|
83
|
-
def in(container)
|
84
|
-
@geo.contained_in = container
|
85
|
-
self
|
86
|
-
end
|
87
|
-
|
88
|
-
def key(key)
|
89
|
-
@api_key = key
|
90
|
-
self
|
91
|
-
end
|
119
|
+
# extract unique file names and valid years
|
120
|
+
data_names = Set.new
|
121
|
+
data_sets.each do |d|
|
122
|
+
name = d['c_dataset'].join('_').upcase
|
92
123
|
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
q.geo = @geo
|
101
|
-
q.api_key = @api_key
|
102
|
-
q
|
124
|
+
if data_names.include? name
|
125
|
+
self.const_get(name).add_vintage DataSetVintage.new(d)
|
126
|
+
else
|
127
|
+
data_names << name
|
128
|
+
ds = DataSet.new
|
129
|
+
ds.add_vintage DataSetVintage.new(d)
|
130
|
+
self.const_set name, ds
|
103
131
|
end
|
104
|
-
|
105
|
-
def to_hash
|
106
|
-
h = {}
|
107
|
-
h['key'] = self.api_key
|
108
|
-
h.merge! geo.to_hash
|
109
|
-
|
110
|
-
v = @variables
|
111
|
-
v = v.join(',') if v.is_a? Array
|
112
|
-
h['get'] = v
|
113
|
-
|
114
|
-
h
|
115
|
-
end
|
116
|
-
|
117
|
-
# Returns the query portion of the API GET string.
|
118
|
-
#
|
119
|
-
def to_s
|
120
|
-
URI.encode_www_form self.to_hash
|
121
|
-
end
|
122
|
-
end
|
123
|
-
|
124
|
-
class <<self
|
125
|
-
|
126
|
-
# Writes the given key to a local file. If a key is installed, then you don't have to specify
|
127
|
-
# a key in your query.
|
128
|
-
#
|
129
|
-
def install_key!(key)
|
130
|
-
File.open INSTALLED_KEY_PATH, 'w' do |f|
|
131
|
-
f.write key
|
132
|
-
end
|
133
|
-
end
|
134
|
-
|
135
|
-
def installed_key
|
136
|
-
if File.exists? INSTALLED_KEY_PATH
|
137
|
-
File.read INSTALLED_KEY_PATH
|
138
|
-
else
|
139
|
-
nil
|
140
|
-
end
|
141
|
-
end
|
142
|
-
|
143
|
-
# Constructs the URL needed to perform the query on the given file.
|
144
|
-
#
|
145
|
-
def api_url(year, file, url_file, query)
|
146
|
-
year = year.to_i
|
147
|
-
unless FILE_VALID_YEARS[file].include? year
|
148
|
-
raise ArgumentError, "Invalid year '#{year}' for file '#{file}'"
|
149
|
-
end
|
150
|
-
|
151
|
-
url_file ||= file
|
152
|
-
yield query if block_given?
|
153
|
-
[API_URL, year.to_s, "#{url_file}?#{query.to_s}"].join('/')
|
154
|
-
end
|
155
|
-
|
156
|
-
# Accesses the data api and returns the unmodified body of the HTTP response. Raises errors
|
157
|
-
# if the HTTP response code indicates a problem.
|
158
|
-
#
|
159
|
-
def api_raw(year, file, url_file, query)
|
160
|
-
yield query if block_given?
|
161
|
-
url = api_url year, file, url_file, query
|
162
|
-
puts "GET #{url}"
|
163
|
-
|
164
|
-
c = Curl::Easy.new url
|
165
|
-
c.perform
|
166
|
-
r = c.response_code
|
167
|
-
|
168
|
-
if r == 200
|
169
|
-
return c.body_str
|
170
|
-
elsif r == 400
|
171
|
-
raise InvalidQueryError
|
172
|
-
elsif r == 204
|
173
|
-
raise NoMatchingRecordsError
|
174
|
-
elsif r == 500
|
175
|
-
raise ServerSideError
|
176
|
-
elsif r == 302 && (c.head.include?("missing_key") || c.head.include?("invalid_key"))
|
177
|
-
raise InvalidKeyError
|
178
|
-
else
|
179
|
-
raise CensusApiError, "Unexpected HTTP response code: #{r}"
|
180
|
-
end
|
181
|
-
end
|
182
|
-
|
183
|
-
# Accesses the the data api and parses the result into a Census::Data object.
|
184
|
-
#
|
185
|
-
def api_data(year, file, url_file, query)
|
186
|
-
yield query if block_given?
|
187
|
-
|
188
|
-
# download the first 50 or fewer variables
|
189
|
-
json = api_raw year, file, url_file, query[0...50]
|
190
|
-
d = Data.new json
|
191
|
-
|
192
|
-
# download remaining variables 50 at a time
|
193
|
-
offset = 50
|
194
|
-
while offset <= query.variables.length
|
195
|
-
json = api_raw year, file, url_file, query[offset...(offset+50)]
|
196
|
-
json = JSON.parse json
|
197
|
-
|
198
|
-
# sometimes the API returns a descriptive hash (in a single element array) if the
|
199
|
-
# requested columns are invalid
|
200
|
-
raise InvalidQueryError if json.first.is_a? Hash
|
201
|
-
|
202
|
-
d.merge! json
|
203
|
-
offset += 50
|
204
|
-
end
|
205
|
-
|
206
|
-
d
|
207
|
-
end
|
208
|
-
|
209
|
-
end
|
210
|
-
|
211
|
-
def self.api_call(file, url_file)
|
212
|
-
|
213
|
-
define_singleton_method file do |year: FILE_VALID_YEARS[file].first, query: Query.new, &block|
|
214
|
-
api_data year, file, url_file, query, &block
|
215
|
-
end
|
216
|
-
|
217
|
-
define_singleton_method(file+'_raw') do |year: FILE_VALID_YEARS[file].first, query: Query.new, &block|
|
218
|
-
api_raw year, file, url_file, query, &block
|
219
|
-
end
|
220
|
-
end
|
221
|
-
|
222
|
-
FILES.each do |f|
|
223
|
-
self.api_call f, FILE_URL_SUBST[f]
|
224
132
|
end
|
225
133
|
|
134
|
+
DATA_SETS = data_names.sort
|
226
135
|
end
|
data/lib/rboc/census.rb
ADDED
@@ -0,0 +1,137 @@
|
|
1
|
+
module Census
|
2
|
+
class CensusApiError < StandardError; end
|
3
|
+
class InvalidQueryError < CensusApiError; end
|
4
|
+
class InvalidKeyError < CensusApiError; end
|
5
|
+
class NoMatchingRecordsError < CensusApiError; end
|
6
|
+
class ServerSideError < CensusApiError; end
|
7
|
+
|
8
|
+
# A class representing a query to the Census API.
|
9
|
+
#
|
10
|
+
class Query
|
11
|
+
attr_accessor :variables, :geo
|
12
|
+
|
13
|
+
def initialize
|
14
|
+
@variables = []
|
15
|
+
@geo = Geography.new
|
16
|
+
end
|
17
|
+
|
18
|
+
def api_key=(key)
|
19
|
+
@api_key = key
|
20
|
+
end
|
21
|
+
|
22
|
+
# Returns the API key to be used for this query. If the key hasn't been set explicitly, this
|
23
|
+
# method attempts to load a key previously installed by Census#install_key!.
|
24
|
+
#
|
25
|
+
def api_key
|
26
|
+
@api_key ||= Census.installed_key
|
27
|
+
end
|
28
|
+
|
29
|
+
# these chainable methods mirror the field names in the HTTP get string
|
30
|
+
|
31
|
+
def get(*vars)
|
32
|
+
@variables = vars
|
33
|
+
self
|
34
|
+
end
|
35
|
+
|
36
|
+
def for(level)
|
37
|
+
@geo.summary_level = level
|
38
|
+
self
|
39
|
+
end
|
40
|
+
|
41
|
+
def in(container)
|
42
|
+
@geo.contained_in = container
|
43
|
+
self
|
44
|
+
end
|
45
|
+
|
46
|
+
def key(key)
|
47
|
+
@api_key = key
|
48
|
+
self
|
49
|
+
end
|
50
|
+
|
51
|
+
# Constructs a new Query object with a subset of variables. Creates a shallow copy of this
|
52
|
+
# Query's geography and api key.
|
53
|
+
#
|
54
|
+
def [](rng)
|
55
|
+
variables = @variables[rng]
|
56
|
+
q = Query.new
|
57
|
+
q.variables = variables
|
58
|
+
q.geo = @geo
|
59
|
+
q.api_key = @api_key
|
60
|
+
q
|
61
|
+
end
|
62
|
+
|
63
|
+
def to_hash
|
64
|
+
h = {}
|
65
|
+
h['key'] = self.api_key
|
66
|
+
h.merge! geo.to_hash
|
67
|
+
|
68
|
+
v = @variables
|
69
|
+
v = v.join(',') if v.is_a? Array
|
70
|
+
h['get'] = v
|
71
|
+
|
72
|
+
h
|
73
|
+
end
|
74
|
+
|
75
|
+
# Returns the query portion of the API GET string.
|
76
|
+
#
|
77
|
+
def to_s
|
78
|
+
URI.encode_www_form self.to_hash
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
class <<self
|
83
|
+
|
84
|
+
# Set up the local directory where cached data and the installed key will be stored.
|
85
|
+
def setup_local_directory!
|
86
|
+
unless Dir.exists? LOCAL_DATA_DIR
|
87
|
+
Dir.mkdir LOCAL_DATA_DIR
|
88
|
+
end
|
89
|
+
|
90
|
+
unless Dir.exists? CACHE_DIR
|
91
|
+
Dir.mkdir CACHE_DIR
|
92
|
+
end
|
93
|
+
|
94
|
+
end
|
95
|
+
|
96
|
+
# Writes the given key to a local file. If a key is installed, then you don't have to specify
|
97
|
+
# a key in your query.
|
98
|
+
#
|
99
|
+
def install_key!(key)
|
100
|
+
File.open INSTALLED_KEY_PATH, 'w' do |f|
|
101
|
+
f.write key
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
# Returns the currently installed key or +nil+ if no key is installed.
|
106
|
+
#
|
107
|
+
def installed_key
|
108
|
+
if File.exists? INSTALLED_KEY_PATH
|
109
|
+
File.read INSTALLED_KEY_PATH
|
110
|
+
else
|
111
|
+
nil
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
# Looks for the url basename in the cache directory. If it doesn't exist, then downloads the
|
116
|
+
# file from the web.
|
117
|
+
#
|
118
|
+
def get_cached_url(url)
|
119
|
+
local_file = File.join CACHE_DIR, File.basename(url)
|
120
|
+
if File.exists? local_file
|
121
|
+
File.read local_file
|
122
|
+
else
|
123
|
+
puts "Getting #{url}"
|
124
|
+
c = Curl::Easy.new url
|
125
|
+
c.perform
|
126
|
+
file_content = c.body_str
|
127
|
+
|
128
|
+
File.open local_file, 'w' do |f|
|
129
|
+
f.write file_content
|
130
|
+
end
|
131
|
+
|
132
|
+
file_content
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
end
|
137
|
+
end
|