rboc 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9e345c92733e6be5ca2b9f229b2e4992bad5c187
4
- data.tar.gz: 3008ae622663bb05437a556aab9b792cfd31a7ed
3
+ metadata.gz: 05e933b4d3dc2ece95ee01a0928a0f3e0a38ce6a
4
+ data.tar.gz: bb7a176c5f7e96e9486c65bf38ebb2b3af65a308
5
5
  SHA512:
6
- metadata.gz: 225a997c45f5cb497c948e5e9c04b53c878e01f5c5a9e671122008aadf96bcf1d3ab7ad374b378a0c044f7c41cd23f28a8d4adc3e1f92c5a7ace53783c1d76db
7
- data.tar.gz: 9e8dee4f2cc1deeaf9fdd889e01dd2075dd8aae278cdc1b656596992069ba841ea92afedd2f321bcaee8c108fd1b8202727aea95fedb24df0072de1666165a6b
6
+ metadata.gz: e6117dbe696ec68046c6471d33d75014e1ed072cd2064de75abf775e15375fc69212c6d0db0a453cd21871551e17828389b384cc851ae01ab9408258f0c32508
7
+ data.tar.gz: 777e7158a44d4470518ad4d737729c4c9999a9542dd0bee37d02a5c747eca6f12711e8d77d05366d654f67b8984331d56ded9b27b1834fa21792072d9d90ea99
data/lib/rboc.rb CHANGED
@@ -1,226 +1,135 @@
1
1
  require 'curb'
2
2
  require 'json'
3
3
  require 'uri'
4
+ require 'set'
4
5
 
6
+ require 'rboc/census'
5
7
  require 'rboc/geo'
6
8
  require 'rboc/data'
7
9
 
8
10
  # A module defining methods for accessing the U.S. Census data API.
9
11
  #
12
+ # Census data is divided between a number of files, like the American Community Survey
13
+ # (ACS) 5 year estimates file, the ACS 3 year estimates file, and the 2010 Census
14
+ # summary file. See the {data documentation}[http://www.census.gov/developers/data/] on
15
+ # the Census website for a description of all available files.
16
+ #
17
+ # In +rboc+, the list of available files (using abbreviated names) is contained in
18
+ # +Census::DATA_SETS+. Each entry in that array corresponds to a class constant in
19
+ # +Census+ assigned to a +Census::DataSet+ instance. A DataSet object contains one or
20
+ # more DataSetVintage objects which represent particular vintage for the given survey.
21
+ # Use the DataSet#vintage_years method to see the vintage years available.
22
+ #
23
+ # Census::ACS5.vintage_years
24
+ # # => [2010, 2011, 2012]
25
+ #
26
+ # To access a particular data set vintage, use square brackets.
27
+ #
28
+ # Census::ACS5[2010].class
29
+ # # => Census::DataSetVintage
30
+ #
31
+ # To download data, use the +query+ method on a DataSet or DataSetVintage object.
32
+ # Calling #query on the containing DataSet is the same as calling #query on the most
33
+ # recent vintage year.
34
+ #
35
+ # Census::ACS5.query(q=Census::Query.new) {|q| ...}
36
+ # # returns data for most recent vintage year
37
+ # Census::ACS5[2010].query(q=Census::Query.new) {|q| ...}
38
+ # # returns data for 2010 vintage year
39
+ #
40
+ # If a block is passed it is called on the Census::Query argument. Queries return
41
+ # Census::ResultSet. For each file there is also a "raw" query method with the same
42
+ # signature:
43
+ #
44
+ # Census::ACS5.query_raw(q=Census::Query.new) {|q| ...}
45
+ #
46
+ # The raw version returns the unmodified response string, which gives the requested
47
+ # data in JSON format. Note, however, that +#query_raw+ will raise an error if you try to
48
+ # download more than 50 variables (this is a restriction of the Census API). #query
49
+ # will break your request into chunks and merge them into a single response object.
50
+ #
51
+ # Examples:
52
+ #
53
+ # # In the following examples I assume the user has installed a key locally, so a
54
+ # key is not # specified in query parameters.
55
+ #
56
+ # # Create a query to request the total population for each county in Iowa.
57
+ # require 'rboc'
58
+ # my_q = Census::Query.new
59
+ # my_q.variables = ['B00001_001E'] # this needs to be an array
60
+ # my_q.geo.summary_level = 'county'
61
+ # my_q.geo.contained_in = { 'state' => 19 }
62
+ #
63
+ # # Pass the query to an appropriate Census file, examine the returned column names, and
64
+ # # iterate over the results.
65
+ # result = Census::ACS5.query my_q
66
+ # result.colnames
67
+ # # => ["B00001_001E", "state", "county"]
68
+ # result.each {|row| p row}
69
+ # # {"B00001_001E" => "1461", "state" => "19", "county" => "001"}
70
+ # # {"B00001_001E" => "823", "state" => "19", "county" => "003"}
71
+ # # ...
72
+ #
73
+ # # You can also iterate over rows without column names
74
+ # result.rows.each {|row| p row}
75
+ # # ["1461", "19", "001"]
76
+ # # ["823", "19", "003"]
77
+ # # ...
78
+ #
79
+ # # You can use a block to set query parameters.
80
+ # result2 = Census::ACS5.query do |q|
81
+ # q.variables = ['B00001_001E']
82
+ # q.geo.summary_level = 'county'
83
+ # q.geo.contained_in = { 'state' => 19 }
84
+ # end
85
+ # result2 == result
86
+ # # => true
87
+ #
88
+ # # There is a second, chainable syntax for defining query parameters that
89
+ # # is convenient for one-liners.
90
+ # result3 = Census::ACS5.query {|q| q.get('B00001_001E').for('county').in('state' => 19)}
91
+ # result3 = result
92
+ # # => true
93
+ #
10
94
  module Census
11
95
 
12
96
  # Base URL of the Census data API.
13
97
  #
14
98
  API_URL = 'http://api.census.gov/data'
15
99
 
16
- # Path to the installed API key relative to this file.
17
- #
18
- INSTALLED_KEY_REL_PATH = '../data/installed_key'
19
-
20
- # Path to the installed API key.
100
+ # Where to store local data
21
101
  #
22
- INSTALLED_KEY_PATH = File.join(File.dirname(File.expand_path(__FILE__)), INSTALLED_KEY_REL_PATH)
102
+ LOCAL_DATA_DIR = File.join ENV['HOME'], '.census'
23
103
 
24
- # Data files accessible through the Census API.
104
+ # Where cached responses from the Census API. Only data descriptions are stored.
25
105
  #
26
- FILES = ['acs1', 'acs1_cd', 'acs3', 'acs5', 'sf1', 'sf3']
106
+ CACHE_DIR = File.join LOCAL_DATA_DIR, 'cache'
27
107
 
28
- # List valid years of data for each data file.
108
+ # Path to the installed API key.
29
109
  #
30
- FILE_VALID_YEARS = {
31
- 'acs1' => [2012],
32
- 'acs1_cd' => [2011],
33
- 'acs3' => [2012],
34
- 'acs5' => [2011, 2010],
35
- 'sf1' => [2010, 2000, 1990],
36
- 'sf3' => [2000, 1990]
37
- }
38
-
39
- FILE_URL_SUBST = {
40
- 'acs1' => 'acs1/profile',
41
- 'acs3' => 'acs3/profile'
42
- }
43
-
44
- class CensusApiError < StandardError; end
45
- class InvalidQueryError < CensusApiError; end
46
- class InvalidKeyError < CensusApiError; end
47
- class NoMatchingRecordsError < CensusApiError; end
48
- class ServerSideError < CensusApiError; end
110
+ INSTALLED_KEY_PATH = File.join LOCAL_DATA_DIR, 'installed_key'
49
111
 
50
- # A class representing a query to the Census API.
112
+ # Data discoverable API URL
51
113
  #
52
- class Query
53
- attr_accessor :variables, :geo
54
-
55
- def initialize
56
- @variables = []
57
- @geo = Geography.new
58
- end
59
-
60
- def api_key=(key)
61
- @api_key = key
62
- end
114
+ DATA_DISCOVERY_URL = 'http://api.census.gov/data.json'
63
115
 
64
- # Returns the API key to be used for this query. If the key hasn't been set explicitly, this
65
- # method attempts to load a key previously installed by Census#install_key!.
66
- #
67
- def api_key
68
- @api_key ||= Census.installed_key
69
- end
70
-
71
- # these chainable methods mirror the field names in the HTTP get string
116
+ self.setup_local_directory!
117
+ data_sets = JSON.parse self.get_cached_url(DATA_DISCOVERY_URL)
72
118
 
73
- def get(*vars)
74
- @variables = vars
75
- self
76
- end
77
-
78
- def for(level)
79
- @geo.summary_level = level
80
- self
81
- end
82
-
83
- def in(container)
84
- @geo.contained_in = container
85
- self
86
- end
87
-
88
- def key(key)
89
- @api_key = key
90
- self
91
- end
119
+ # extract unique file names and valid years
120
+ data_names = Set.new
121
+ data_sets.each do |d|
122
+ name = d['c_dataset'].join('_').upcase
92
123
 
93
- # Constructs a new Query object with a subset of variables. Creates a shallow copy of this
94
- # Query's geography and api key.
95
- #
96
- def [](rng)
97
- variables = @variables[rng]
98
- q = Query.new
99
- q.variables = variables
100
- q.geo = @geo
101
- q.api_key = @api_key
102
- q
124
+ if data_names.include? name
125
+ self.const_get(name).add_vintage DataSetVintage.new(d)
126
+ else
127
+ data_names << name
128
+ ds = DataSet.new
129
+ ds.add_vintage DataSetVintage.new(d)
130
+ self.const_set name, ds
103
131
  end
104
-
105
- def to_hash
106
- h = {}
107
- h['key'] = self.api_key
108
- h.merge! geo.to_hash
109
-
110
- v = @variables
111
- v = v.join(',') if v.is_a? Array
112
- h['get'] = v
113
-
114
- h
115
- end
116
-
117
- # Returns the query portion of the API GET string.
118
- #
119
- def to_s
120
- URI.encode_www_form self.to_hash
121
- end
122
- end
123
-
124
- class <<self
125
-
126
- # Writes the given key to a local file. If a key is installed, then you don't have to specify
127
- # a key in your query.
128
- #
129
- def install_key!(key)
130
- File.open INSTALLED_KEY_PATH, 'w' do |f|
131
- f.write key
132
- end
133
- end
134
-
135
- def installed_key
136
- if File.exists? INSTALLED_KEY_PATH
137
- File.read INSTALLED_KEY_PATH
138
- else
139
- nil
140
- end
141
- end
142
-
143
- # Constructs the URL needed to perform the query on the given file.
144
- #
145
- def api_url(year, file, url_file, query)
146
- year = year.to_i
147
- unless FILE_VALID_YEARS[file].include? year
148
- raise ArgumentError, "Invalid year '#{year}' for file '#{file}'"
149
- end
150
-
151
- url_file ||= file
152
- yield query if block_given?
153
- [API_URL, year.to_s, "#{url_file}?#{query.to_s}"].join('/')
154
- end
155
-
156
- # Accesses the data api and returns the unmodified body of the HTTP response. Raises errors
157
- # if the HTTP response code indicates a problem.
158
- #
159
- def api_raw(year, file, url_file, query)
160
- yield query if block_given?
161
- url = api_url year, file, url_file, query
162
- puts "GET #{url}"
163
-
164
- c = Curl::Easy.new url
165
- c.perform
166
- r = c.response_code
167
-
168
- if r == 200
169
- return c.body_str
170
- elsif r == 400
171
- raise InvalidQueryError
172
- elsif r == 204
173
- raise NoMatchingRecordsError
174
- elsif r == 500
175
- raise ServerSideError
176
- elsif r == 302 && (c.head.include?("missing_key") || c.head.include?("invalid_key"))
177
- raise InvalidKeyError
178
- else
179
- raise CensusApiError, "Unexpected HTTP response code: #{r}"
180
- end
181
- end
182
-
183
- # Accesses the the data api and parses the result into a Census::Data object.
184
- #
185
- def api_data(year, file, url_file, query)
186
- yield query if block_given?
187
-
188
- # download the first 50 or fewer variables
189
- json = api_raw year, file, url_file, query[0...50]
190
- d = Data.new json
191
-
192
- # download remaining variables 50 at a time
193
- offset = 50
194
- while offset <= query.variables.length
195
- json = api_raw year, file, url_file, query[offset...(offset+50)]
196
- json = JSON.parse json
197
-
198
- # sometimes the API returns a descriptive hash (in a single element array) if the
199
- # requested columns are invalid
200
- raise InvalidQueryError if json.first.is_a? Hash
201
-
202
- d.merge! json
203
- offset += 50
204
- end
205
-
206
- d
207
- end
208
-
209
- end
210
-
211
- def self.api_call(file, url_file)
212
-
213
- define_singleton_method file do |year: FILE_VALID_YEARS[file].first, query: Query.new, &block|
214
- api_data year, file, url_file, query, &block
215
- end
216
-
217
- define_singleton_method(file+'_raw') do |year: FILE_VALID_YEARS[file].first, query: Query.new, &block|
218
- api_raw year, file, url_file, query, &block
219
- end
220
- end
221
-
222
- FILES.each do |f|
223
- self.api_call f, FILE_URL_SUBST[f]
224
132
  end
225
133
 
134
+ DATA_SETS = data_names.sort
226
135
  end
@@ -0,0 +1,137 @@
1
+ module Census
2
+ class CensusApiError < StandardError; end
3
+ class InvalidQueryError < CensusApiError; end
4
+ class InvalidKeyError < CensusApiError; end
5
+ class NoMatchingRecordsError < CensusApiError; end
6
+ class ServerSideError < CensusApiError; end
7
+
8
+ # A class representing a query to the Census API.
9
+ #
10
+ class Query
11
+ attr_accessor :variables, :geo
12
+
13
+ def initialize
14
+ @variables = []
15
+ @geo = Geography.new
16
+ end
17
+
18
+ def api_key=(key)
19
+ @api_key = key
20
+ end
21
+
22
+ # Returns the API key to be used for this query. If the key hasn't been set explicitly, this
23
+ # method attempts to load a key previously installed by Census#install_key!.
24
+ #
25
+ def api_key
26
+ @api_key ||= Census.installed_key
27
+ end
28
+
29
+ # these chainable methods mirror the field names in the HTTP get string
30
+
31
+ def get(*vars)
32
+ @variables = vars
33
+ self
34
+ end
35
+
36
+ def for(level)
37
+ @geo.summary_level = level
38
+ self
39
+ end
40
+
41
+ def in(container)
42
+ @geo.contained_in = container
43
+ self
44
+ end
45
+
46
+ def key(key)
47
+ @api_key = key
48
+ self
49
+ end
50
+
51
+ # Constructs a new Query object with a subset of variables. Creates a shallow copy of this
52
+ # Query's geography and api key.
53
+ #
54
+ def [](rng)
55
+ variables = @variables[rng]
56
+ q = Query.new
57
+ q.variables = variables
58
+ q.geo = @geo
59
+ q.api_key = @api_key
60
+ q
61
+ end
62
+
63
+ def to_hash
64
+ h = {}
65
+ h['key'] = self.api_key
66
+ h.merge! geo.to_hash
67
+
68
+ v = @variables
69
+ v = v.join(',') if v.is_a? Array
70
+ h['get'] = v
71
+
72
+ h
73
+ end
74
+
75
+ # Returns the query portion of the API GET string.
76
+ #
77
+ def to_s
78
+ URI.encode_www_form self.to_hash
79
+ end
80
+ end
81
+
82
+ class <<self
83
+
84
+ # Set up the local directory where cached data and the installed key will be stored.
85
+ def setup_local_directory!
86
+ unless Dir.exists? LOCAL_DATA_DIR
87
+ Dir.mkdir LOCAL_DATA_DIR
88
+ end
89
+
90
+ unless Dir.exists? CACHE_DIR
91
+ Dir.mkdir CACHE_DIR
92
+ end
93
+
94
+ end
95
+
96
+ # Writes the given key to a local file. If a key is installed, then you don't have to specify
97
+ # a key in your query.
98
+ #
99
+ def install_key!(key)
100
+ File.open INSTALLED_KEY_PATH, 'w' do |f|
101
+ f.write key
102
+ end
103
+ end
104
+
105
+ # Returns the currently installed key or +nil+ if no key is installed.
106
+ #
107
+ def installed_key
108
+ if File.exists? INSTALLED_KEY_PATH
109
+ File.read INSTALLED_KEY_PATH
110
+ else
111
+ nil
112
+ end
113
+ end
114
+
115
+ # Looks for the url basename in the cache directory. If it doesn't exist, then downloads the
116
+ # file from the web.
117
+ #
118
+ def get_cached_url(url)
119
+ local_file = File.join CACHE_DIR, File.basename(url)
120
+ if File.exists? local_file
121
+ File.read local_file
122
+ else
123
+ puts "Getting #{url}"
124
+ c = Curl::Easy.new url
125
+ c.perform
126
+ file_content = c.body_str
127
+
128
+ File.open local_file, 'w' do |f|
129
+ f.write file_content
130
+ end
131
+
132
+ file_content
133
+ end
134
+ end
135
+
136
+ end
137
+ end