rboc 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/rboc/data.rb ADDED
@@ -0,0 +1,77 @@
1
+ require 'json'
2
+
3
+ module Census
4
+
5
+ # A result data set
6
+ #
7
+ class Data
8
+
9
+ # Split a list of column names into geographic columns and data columns
10
+ def self.split_colnames(colnames)
11
+ geocolnames = []
12
+ datacolnames = []
13
+ colnames.each do |s|
14
+ if Geography::LEVELS.include? s
15
+ geocolnames << s
16
+ else
17
+ datacolnames << s
18
+ end
19
+ end
20
+
21
+ [geocolnames, datacolnames]
22
+ end
23
+
24
+ include Enumerable
25
+
26
+ attr_reader :colnames, :rows
27
+
28
+ # Constructs a new data object from Census data returned by the API. The format of JSON
29
+ # should be:
30
+ # [["column1", "column2", ...], [row11, row12, ...], [row21, row22, ...], ...]
31
+ #
32
+ def initialize(json='[]')
33
+ json = JSON.parse json if json.is_a? String
34
+ @colnames, *@rows = *json
35
+ @colmap = Hash[@colnames.zip (0..@colnames.length)]
36
+
37
+ @geocolnames, @datacolnames = self.class.split_colnames colnames
38
+ end
39
+
40
+ def each
41
+ @rows.each do |row|
42
+ yield Hash[@colnames.zip row]
43
+ end
44
+ end
45
+
46
+ # Merges an existing Census data set with additional data returned from the API. Currently,
47
+ # this method assumes columns and rows are returned in a consistent order given the same
48
+ # geography.
49
+ #
50
+ def merge!(json)
51
+ json = JSON.parse json if json.is_a? String
52
+ colnames, *rows = *json
53
+ colmap = Hash[colnames.zip (0..colnames.length)]
54
+ geocolnames, datacolnames = self.class.split_colnames colnames
55
+
56
+ if geocolnames != @geocolnames
57
+ raise ArgumentError, "Mismatched geographies"
58
+ end
59
+
60
+ @rows.map!.with_index do |row, i|
61
+ if @geocolnames.any? {|s| row[@colmap[s]] != rows[i][colmap[s]]}
62
+ raise ArgumentError, "Mismatched rows"
63
+ end
64
+
65
+ row += datacolnames.map {|s| rows[i][colmap[s]]}
66
+ end
67
+
68
+ n = @colnames.length
69
+ @colmap.merge! Hash[datacolnames.zip (n..(n+datacolnames.length))]
70
+ @colnames += datacolnames
71
+ @datacolnames += datacolnames
72
+
73
+ self
74
+ end
75
+
76
+ end
77
+ end
data/lib/rboc/geo.rb ADDED
@@ -0,0 +1,61 @@
1
+ module Census
2
+
3
+ # A Census geography
4
+ #
5
+ class Geography
6
+ LEVELS = [
7
+ 'us', 'region', 'division', 'state', 'county', 'tract'
8
+ ]
9
+
10
+ LEVEL_ALIAS = {
11
+ 'regions' => 'region',
12
+ 'divisions' => 'division',
13
+ 'states' => 'state',
14
+ 'counties' => 'county',
15
+ 'tracts' => 'tract',
16
+ }
17
+
18
+ attr_accessor :summary_level, :contained_in
19
+
20
+ def initialize
21
+ @summary_level = {}
22
+ @contained_in = {}
23
+ end
24
+
25
+ # Sets the summary level to the specified value. If 'lvl' is a hash, it should
26
+ # only contain one element.
27
+ #
28
+ def summary_level=(lvl)
29
+
30
+ if lvl.is_a? Hash
31
+ k, v = lvl.first
32
+ k = LEVEL_ALIAS[k] if LEVEL_ALIAS[k]
33
+ @summary_level[k] = v
34
+ else
35
+ k = LEVEL_ALIAS[lvl] || lvl
36
+ @summary_level[k] = '*'
37
+ end
38
+ end
39
+
40
+ def to_hash
41
+ h = {}
42
+ @summary_level['us'] = '*' if @summary_level.empty?
43
+
44
+ k, v = @summary_level.first
45
+ h['for'] = "#{k}:#{v}"
46
+
47
+ unless @contained_in.empty?
48
+ h['in'] = @contained_in.map {|k, v| "#{k}:#{v}"}.join("+")
49
+ end
50
+
51
+ h
52
+ end
53
+
54
+ # Returns the geography portion of the API GET string.
55
+ #
56
+ def to_s
57
+ URI.encode_www_form self.to_hash
58
+ end
59
+ end
60
+
61
+ end
data/lib/rboc.rb ADDED
@@ -0,0 +1,226 @@
1
+ require 'curb'
2
+ require 'json'
3
+ require 'uri'
4
+
5
+ require 'rboc/geo'
6
+ require 'rboc/data'
7
+
8
+ # A module defining methods for accessing the U.S. Census data API.
9
+ #
10
+ module Census
11
+
12
+ # Base URL of the Census data API.
13
+ #
14
+ API_URL = 'http://api.census.gov/data'
15
+
16
+ # Path to the installed API key relative to this file.
17
+ #
18
+ INSTALLED_KEY_REL_PATH = '../data/installed_key'
19
+
20
+ # Path to the installed API key.
21
+ #
22
+ INSTALLED_KEY_PATH = File.join(File.dirname(File.expand_path(__FILE__)), INSTALLED_KEY_REL_PATH)
23
+
24
+ # Data files accessible through the Census API.
25
+ #
26
+ FILES = ['acs1', 'acs1_cd', 'acs3', 'acs5', 'sf1', 'sf3']
27
+
28
+ # List valid years of data for each data file.
29
+ #
30
+ FILE_VALID_YEARS = {
31
+ 'acs1' => [2012],
32
+ 'acs1_cd' => [2011],
33
+ 'acs3' => [2012],
34
+ 'acs5' => [2011, 2010],
35
+ 'sf1' => [2010, 2000, 1990],
36
+ 'sf3' => [2000, 1990]
37
+ }
38
+
39
+ FILE_URL_SUBST = {
40
+ 'acs1' => 'acs1/profile',
41
+ 'acs3' => 'acs3/profile'
42
+ }
43
+
44
+ class CensusApiError < StandardError; end
45
+ class InvalidQueryError < CensusApiError; end
46
+ class InvalidKeyError < CensusApiError; end
47
+ class NoMatchingRecordsError < CensusApiError; end
48
+ class ServerSideError < CensusApiError; end
49
+
50
+ # A class representing a query to the Census API.
51
+ #
52
+ class Query
53
+ attr_accessor :variables, :geo
54
+
55
+ def initialize
56
+ @variables = []
57
+ @geo = Geography.new
58
+ end
59
+
60
+ def api_key=(key)
61
+ @api_key = key
62
+ end
63
+
64
+ # Returns the API key to be used for this query. If the key hasn't been set explicitly, this
65
+ # method attempts to load a key previously installed by Census#install_key!.
66
+ #
67
+ def api_key
68
+ @api_key ||= Census.installed_key
69
+ end
70
+
71
+ # these chainable methods mirror the field names in the HTTP get string
72
+
73
+ def get(*vars)
74
+ @variables = vars
75
+ self
76
+ end
77
+
78
+ def for(level)
79
+ @geo.summary_level = level
80
+ self
81
+ end
82
+
83
+ def in(container)
84
+ @geo.contained_in = container
85
+ self
86
+ end
87
+
88
+ def key(key)
89
+ @api_key = key
90
+ self
91
+ end
92
+
93
+ # Constructs a new Query object with a subset of variables. Creates a shallow copy of this
94
+ # Query's geography and api key.
95
+ #
96
+ def [](rng)
97
+ variables = @variables[rng]
98
+ q = Query.new
99
+ q.variables = variables
100
+ q.geo = @geo
101
+ q.api_key = @api_key
102
+ q
103
+ end
104
+
105
+ def to_hash
106
+ h = {}
107
+ h['key'] = self.api_key
108
+ h.merge! geo.to_hash
109
+
110
+ v = @variables
111
+ v = v.join(',') if v.is_a? Array
112
+ h['get'] = v
113
+
114
+ h
115
+ end
116
+
117
+ # Returns the query portion of the API GET string.
118
+ #
119
+ def to_s
120
+ URI.encode_www_form self.to_hash
121
+ end
122
+ end
123
+
124
+ class <<self
125
+
126
+ # Writes the given key to a local file. If a key is installed, then you don't have to specify
127
+ # a key in your query.
128
+ #
129
+ def install_key!(key)
130
+ File.open INSTALLED_KEY_PATH, 'w' do |f|
131
+ f.write key
132
+ end
133
+ end
134
+
135
+ def installed_key
136
+ if File.exists? INSTALLED_KEY_PATH
137
+ File.read INSTALLED_KEY_PATH
138
+ else
139
+ nil
140
+ end
141
+ end
142
+
143
+ # Constructs the URL needed to perform the query on the given file.
144
+ #
145
+ def api_url(year, file, url_file, query)
146
+ year = year.to_i
147
+ unless FILE_VALID_YEARS[file].include? year
148
+ raise ArgumentError, "Invalid year '#{year}' for file '#{file}'"
149
+ end
150
+
151
+ url_file ||= file
152
+ yield query if block_given?
153
+ [API_URL, year.to_s, "#{url_file}?#{query.to_s}"].join('/')
154
+ end
155
+
156
+ # Accesses the data api and returns the unmodified body of the HTTP response. Raises errors
157
+ # if the HTTP response code indicates a problem.
158
+ #
159
+ def api_raw(year, file, url_file, query)
160
+ yield query if block_given?
161
+ url = api_url year, file, url_file, query
162
+ puts "GET #{url}"
163
+
164
+ c = Curl::Easy.new url
165
+ c.perform
166
+ r = c.response_code
167
+
168
+ if r == 200
169
+ return c.body_str
170
+ elsif r == 400
171
+ raise InvalidQueryError
172
+ elsif r == 204
173
+ raise NoMatchingRecordsError
174
+ elsif r == 500
175
+ raise ServerSideError
176
+ elsif r == 302 && (c.head.include?("missing_key") || c.head.include?("invalid_key"))
177
+ raise InvalidKeyError
178
+ else
179
+ raise CensusApiError, "Unexpected HTTP response code: #{r}"
180
+ end
181
+ end
182
+
183
+ # Accesses the the data api and parses the result into a Census::Data object.
184
+ #
185
+ def api_data(year, file, url_file, query)
186
+ yield query if block_given?
187
+
188
+ # download the first 50 or fewer variables
189
+ json = api_raw year, file, url_file, query[0...50]
190
+ d = Data.new json
191
+
192
+ # download remaining variables 50 at a time
193
+ offset = 50
194
+ while offset <= query.variables.length
195
+ json = api_raw year, file, url_file, query[offset...(offset+50)]
196
+ json = JSON.parse json
197
+
198
+ # sometimes the API returns a descriptive hash (in a single element array) if the
199
+ # requested columns are invalid
200
+ raise InvalidQueryError if json.first.is_a? Hash
201
+
202
+ d.merge! json
203
+ offset += 50
204
+ end
205
+
206
+ d
207
+ end
208
+
209
+ end
210
+
211
+ def self.api_call(file, url_file)
212
+
213
+ define_singleton_method file do |year: FILE_VALID_YEARS[file].first, query: Query.new, &block|
214
+ api_data year, file, url_file, query, &block
215
+ end
216
+
217
+ define_singleton_method(file+'_raw') do |year: FILE_VALID_YEARS[file].first, query: Query.new, &block|
218
+ api_raw year, file, url_file, query, &block
219
+ end
220
+ end
221
+
222
+ FILES.each do |f|
223
+ self.api_call f, FILE_URL_SUBST[f]
224
+ end
225
+
226
+ end
metadata ADDED
@@ -0,0 +1,75 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rboc
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Joshua Tokle
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-12-16 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: curb
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: 0.8.5
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: 0.8.5
27
+ - !ruby/object:Gem::Dependency
28
+ name: json
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: 1.8.1
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: 1.8.1
41
+ description: An interface to the API provided by the U.S. Census Bureau
42
+ email: jtokle@gmail.com
43
+ executables: []
44
+ extensions: []
45
+ extra_rdoc_files: []
46
+ files:
47
+ - lib/rboc.rb
48
+ - lib/rboc/geo.rb
49
+ - lib/rboc/data.rb
50
+ - data/acs_1yr_profile_2012.xml
51
+ homepage: http://github.com/jotok/rboc
52
+ licenses:
53
+ - Public Domain
54
+ metadata: {}
55
+ post_install_message:
56
+ rdoc_options: []
57
+ require_paths:
58
+ - lib
59
+ required_ruby_version: !ruby/object:Gem::Requirement
60
+ requirements:
61
+ - - ">="
62
+ - !ruby/object:Gem::Version
63
+ version: '0'
64
+ required_rubygems_version: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ requirements: []
70
+ rubyforge_project:
71
+ rubygems_version: 2.0.14
72
+ signing_key:
73
+ specification_version: 4
74
+ summary: An interface to the API provided by the U.S. Census Bureau
75
+ test_files: []