scensus-utils 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
data/Rakefile CHANGED
@@ -15,6 +15,7 @@ begin
15
15
 
16
16
  gem.add_dependency('GeoRuby', '>= 1.3.4')
17
17
  gem.add_dependency('json', '>= 1.2.0')
18
+ gem.add_dependency('progressbar', '>= 0.9.0')
18
19
 
19
20
  end
20
21
  Jeweler::GemcutterTasks.new
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.0
1
+ 0.2.0
@@ -0,0 +1,391 @@
1
+ #--
2
+ #
3
+ # Utility for creating Google Maps Encoded GPolylines
4
+ # http://facstaff.unca.edu/mcmcclur/GoogleMaps/EncodePolyline/gmap_polyline_encoder.rb.txt
5
+ #
6
+ # License: You may distribute this code under the same terms as Ruby itself
7
+ #
8
+ # Author: Joel Rosenberg
9
+ #
10
+ # ( Drawing from the official example pages as well as Mark McClure's work )
11
+ #
12
+ # == Example
13
+ #
14
+ # data = [
15
+ # [ 37.4419, -122.1419],
16
+ # [ 37.4519, -122.1519],
17
+ # [ 37.4619, -122.1819],
18
+ # ]
19
+ #
20
+ # encoder = GMapPolylineEncoder.new()
21
+ # result = encoder.encode( data )
22
+ #
23
+ # javascript << " var myLine = new GPolyline.fromEncoded({\n"
24
+ # javascript << " color: \"#FF0000\",\n"
25
+ # javascript << " weight: 10,\n"
26
+ # javascript << " opacity: 0.5,\n"
27
+ # javascript << " zoomFactor: #{result[:zoomFactor]},\n"
28
+ # javascript << " numLevels: #{result[:numLevels]},\n"
29
+ # javascript << " points: \"#{result[:points]}\",\n"
30
+ # javascript << " levels: \"#{result[:levels]}\"\n"
31
+ # javascript << " });"
32
+ #
33
+ # == Methods
34
+ #
35
+ # Constructor args (all optional):
36
+ # :numLevels (default 18)
37
+ # :zoomFactor (default 2)
38
+ # :reduce: Reduce points (default true)
39
+ # :escape: Escape backslashes (default true)
40
+ #
41
+ # encode( points ) method
42
+ # points (required): array of longitude, latitude pairs
43
+ #
44
+ # returns hash with keys :points, :levels, :zoomFactor, :numLevels
45
+ #
46
+ # == Background
47
+ #
48
+ # Description: http://www.google.com/apis/maps/documentation/#Encoded_Polylines
49
+ # API: http://www.google.com/apis/maps/documentation/reference.html#GPolyline
50
+ # Hints: http://www.google.com/apis/maps/documentation/polylinealgorithm.html
51
+ #
52
+ # Example Javascript for instantiating an encoded polyline:
53
+ # var encodedPolyline = new GPolyline.fromEncoded({
54
+ # color: "#FF0000",
55
+ # weight: 10,
56
+ # points: "yzocFzynhVq}@n}@o}@nzD",
57
+ # levels: "BBB",
58
+ # zoomFactor: 32,
59
+ # numLevels: 4
60
+ # });
61
+ #
62
+ # == Changes
63
+ #
64
+ # 06.29.2007 - Release 0.1
65
+ # Profiling showed that distance() accounted for 50% of the time when
66
+ # processing McClure's British coast data. By moving the distance
67
+ # calculation into encode(), we can cache a few of the calculations
68
+ # (magnitude) and eliminate the overhead of the function call. This
69
+ # reduced the time to encode by ~ 30%
70
+ #
71
+ # 06.21.2007 Implementing the Doublas-Peucker algorithm for removing superflous
72
+ # points as per Mark McClure's design:
73
+ # http://facstaff.unca.edu/mcmcclur/GoogleMaps/EncodePolyline/
74
+ #
75
+ # 10.14.2006 Cleaned up (and finally grasped) zoom levels
76
+ #
77
+ # 09.2006 First port of the official example's javascript. Ignoring zoom
78
+ # levels for now, showing points at all zoom levels
79
+ #
80
+ #++
81
+
82
+ class GMapPolylineEncoder
83
+ attr_accessor :reduce, :escape #zoomFactor and numLevels need side effects
84
+ attr_reader :zoomFactor, :numLevels
85
+
86
+ # The minimum distance from the line that a point must exceed to avoid
87
+ # elimination under the DP Algorithm.
88
+ @@dp_threshold = 0.00001
89
+
90
+ def initialize(options = {})
91
+ # There are no required parameters
92
+
93
+ # Nice defaults
94
+ @numLevels = options.has_key?(:numLevels) ? options[:numLevels] : 18
95
+ @zoomFactor = options.has_key?(:zoomFactor) ? options[:zoomFactor] : 2
96
+
97
+ # Calculate the distance thresholds for each zoom level
98
+ calculate_zoom_breaks()
99
+
100
+ # By default we'll simplify the polyline unless told otherwise
101
+ @reduce = ! options.has_key?(:reduce) ? true : options[:reduce]
102
+
103
+ # Escape by default; most people are using this in a web context
104
+ @escape = ! options.has_key?(:escape) ? true : options[:escape]
105
+
106
+ end
107
+
108
+ def numLevels=( new_num_levels )
109
+ @numLevels = new_num_levels
110
+ # We need to recalculate our zoom breaks
111
+ calculate_zoom_breaks()
112
+ end
113
+
114
+ def zoomFactor=( new_zoom_factor )
115
+ @zoomFactor = new_zoom_factor
116
+ # We need to recalculate our zoom breaks
117
+ calculate_zoom_breaks()
118
+ end
119
+
120
+ def encode( points )
121
+
122
+ #
123
+ # This is an implementation of the Douglas-Peucker algorithm for simplifying
124
+ # a line. You can thing of it as an elimination of points that do not
125
+ # deviate enough from a vector. That threshold for point elimination is in
126
+ # @@dp_threshold. See
127
+ #
128
+ # http://everything2.com/index.pl?node_id=859282
129
+ #
130
+ # for an explanation of the algorithm
131
+ #
132
+
133
+ max_dist = 0 # Greatest distance we measured during the run
134
+ stack = []
135
+ distances = Array.new(points.size)
136
+
137
+ if(points.length > 2)
138
+ stack << [0, points.size-1]
139
+
140
+ while(stack.length > 0)
141
+ current_line = stack.pop()
142
+ p1_idx = current_line[0]
143
+ pn_idx = current_line[1]
144
+ pb_dist = 0
145
+ pb_idx = nil
146
+
147
+ x1 = points[p1_idx][0]
148
+ y1 = points[p1_idx][1]
149
+ x2 = points[pn_idx][0]
150
+ y2 = points[pn_idx][1]
151
+
152
+ # Caching the line's magnitude for performance
153
+ magnitude = Math.sqrt((x2 - x1)**2 + (y2 - y1)**2)
154
+ magnitude_squared = magnitude ** 2
155
+
156
+ # Find the farthest point and its distance from the line between our pair
157
+ for i in (p1_idx+1)..(pn_idx-1)
158
+
159
+ # Refactoring distance computation inline for performance
160
+ #current_distance = compute_distance(points[i], points[p1_idx], points[pn_idx])
161
+
162
+ #
163
+ # This uses Euclidian geometry. It shouldn't be that big of a deal since
164
+ # we're using it as a rough comparison for line elimination and zoom
165
+ # calculation.
166
+ #
167
+ # TODO: Implement Haversine functions which would probably bring this to
168
+ # a snail's pace (ehhhh)
169
+ #
170
+
171
+ px = points[i][0]
172
+ py = points[i][1]
173
+
174
+ current_distance = nil
175
+
176
+ if( magnitude == 0 )
177
+ # The line is really just a point
178
+ current_distance = Math.sqrt((x2-px)**2 + (y2-py)**2)
179
+ else
180
+
181
+ u = (((px - x1) * (x2 - x1)) + ((py - y1) * (y2 - y1))) / magnitude_squared
182
+
183
+ if( u <= 0 || u > 1 )
184
+ # The point is closest to an endpoint. Find out which one
185
+ ix = Math.sqrt((x1 - px)**2 + (y1 - py)**2)
186
+ iy = Math.sqrt((x2 - px)**2 + (y2 - py)**2)
187
+ if( ix > iy )
188
+ current_distance = iy
189
+ else
190
+ current_distance = ix
191
+ end
192
+ else
193
+ # The perpendicular point intersects the line
194
+ ix = x1 + u * (x2 - x1)
195
+ iy = y1 + u * (y2 - y1)
196
+ current_distance = Math.sqrt((ix - px)**2 + (iy - py)**2)
197
+ end
198
+ end
199
+
200
+ # See if this distance is the greatest for this segment so far
201
+ if(current_distance > pb_dist)
202
+ pb_dist = current_distance
203
+ pb_idx = i
204
+ end
205
+ end
206
+
207
+ # See if this is the greatest distance for all points
208
+ if(pb_dist > max_dist)
209
+ max_dist = pb_dist
210
+ end
211
+
212
+ if(pb_dist > @@dp_threshold)
213
+ # Our point, Pb, that had the greatest distance from the line, is also
214
+ # greater than our threshold. Process again using Pb as a new
215
+ # start/end point. Record this distance - we'll use it later when
216
+ # creating zoom values
217
+ distances[pb_idx] = pb_dist
218
+ stack << [p1_idx, pb_idx]
219
+ stack << [pb_idx, pn_idx]
220
+ end
221
+
222
+ end
223
+ end
224
+
225
+ # Force line endpoints to be included (sloppy, but faster than checking for
226
+ # endpoints in encode_points())
227
+ distances[0] = max_dist
228
+ distances[distances.length-1] = max_dist
229
+
230
+ # Create Base64 encoded strings for our points and zoom levels
231
+ points_enc = encode_points( points, distances)
232
+ levels_enc = encode_levels( points, distances, max_dist)
233
+
234
+ # Make points_enc an escaped string if desired.
235
+ # We should escape the levels too, in case google pulls a switcheroo
236
+ @escape && points_enc && points_enc.gsub!( /\\/, '\\\\\\\\' )
237
+
238
+
239
+ # Returning a hash. Yes, I am a Perl programmer
240
+ return {
241
+ :points => points_enc,
242
+ :levels => levels_enc,
243
+ :zoomFactor => @zoomFactor,
244
+ :numLevels => @numLevels,
245
+ }
246
+
247
+ end
248
+
249
+ private
250
+
251
+ def calculate_zoom_breaks()
252
+ # Calculate the distance thresholds for each zoom level
253
+ @zoom_level_breaks = Array.new(@numLevels);
254
+
255
+ for i in 0..(@numLevels-1)
256
+ @zoom_level_breaks[i] = @@dp_threshold * (@zoomFactor ** ( @numLevels-i-1));
257
+ end
258
+
259
+ return
260
+ end
261
+
262
+ def encode_points( points, distances )
263
+ encoded = ""
264
+
265
+ plat = 0
266
+ plon = 0
267
+
268
+ #points.each do |point| # Gah, need the distances.
269
+ for i in 0..(points.size() - 1)
270
+ if(! @reduce || distances[i] != nil )
271
+ point = points[i]
272
+ late5 = (point[0] * 1e5).floor();
273
+ lone5 = (point[1] * 1e5).floor();
274
+
275
+ dlat = late5 - plat
276
+ dlon = lone5 - plon
277
+
278
+ plat = late5;
279
+ plon = lone5;
280
+
281
+ # I used to need this for some reason
282
+ #encoded << encodeSignedNumber(Fixnum.induced_from(dlat)).to_s
283
+ #encoded << encodeSignedNumber(Fixnum.induced_from(dlon)).to_s
284
+ encoded << encodeSignedNumber(dlat).to_s
285
+ encoded << encodeSignedNumber(dlon).to_s
286
+ end
287
+ end
288
+
289
+ return encoded
290
+
291
+ end
292
+
293
+ def encode_levels( points, distances, max_dist )
294
+
295
+ encoded = "";
296
+
297
+ # Force startpoint
298
+ encoded << encodeNumber(@numLevels - 1)
299
+
300
+ if( points.size() > 2 )
301
+ for i in 1..(points.size() - 2)
302
+ distance = distances[i]
303
+ if( ! @reduce || distance != nil)
304
+ computed_level = 0
305
+
306
+ while (distance < @zoom_level_breaks[computed_level]) do
307
+ computed_level += 1
308
+ end
309
+
310
+ encoded << encodeNumber( @numLevels - computed_level - 1 )
311
+ end
312
+ end
313
+ end
314
+
315
+ # Force endpoint
316
+ encoded << encodeNumber(@numLevels - 1)
317
+
318
+ return encoded;
319
+
320
+ end
321
+
322
+ def compute_distance( point, lineStart, lineEnd )
323
+
324
+ #
325
+ # Note: This has been refactored to encode() inline for performance and
326
+ # computation caching
327
+ #
328
+
329
+ px = point[0]
330
+ py = point[1]
331
+ x1 = lineStart[0]
332
+ y1 = lineStart[1]
333
+ x2 = lineEnd[0]
334
+ y2 = lineEnd[1]
335
+
336
+ distance = nil
337
+
338
+ magnitude = Math.sqrt((x2 - x1)**2 + (y2 - y1)**2)
339
+
340
+ if( magnitude == 0 )
341
+ return Math.sqrt((x2-px)**2 + (y2-py)**2)
342
+ end
343
+
344
+ u = (((px - x1) * (x2 - x1)) + ((py - y1) * (y2 - y1))) / (magnitude**2)
345
+
346
+ if( u <= 0 || u > 1 )
347
+ # The point is closest to an endpoint. Find out which
348
+ ix = Math.sqrt((x1 - px)**2 + (y1 - py)**2)
349
+ iy = Math.sqrt((x2 - px)**2 + (y2 - py)**2)
350
+ if( ix > iy )
351
+ distance = iy
352
+ else
353
+ distance = ix
354
+ end
355
+ else
356
+ # The perpendicular point intersects the line
357
+ ix = x1 + u * (x2 - x1)
358
+ iy = y1 + u * (y2 - y1)
359
+ distance = Math.sqrt((ix - px)**2 + (iy - py)**2)
360
+ end
361
+
362
+ return distance
363
+ end
364
+
365
+ def encodeSignedNumber(num)
366
+ # Based on the official google example
367
+
368
+ sgn_num = num << 1
369
+
370
+ if( num < 0 )
371
+ sgn_num = ~(sgn_num)
372
+ end
373
+
374
+ return encodeNumber(sgn_num)
375
+ end
376
+
377
+ def encodeNumber(num)
378
+ # Based on the official google example
379
+
380
+ encoded = "";
381
+
382
+ while (num >= 0x20) do
383
+ encoded << ((0x20 | (num & 0x1f)) + 63).chr;
384
+ num = num >> 5;
385
+ end
386
+
387
+ encoded << (num + 63).chr;
388
+ return encoded;
389
+ end
390
+
391
+ end
data/lib/scensus-utils.rb CHANGED
@@ -38,12 +38,14 @@ require 'rdoc/usage'
38
38
  require 'ostruct'
39
39
  require 'date'
40
40
 
41
+ require 'progressbar'
41
42
  require 'json'
43
+ require 'gmap_polyline_encoder.rb'
42
44
  require "geo_ruby"
43
45
  include GeoRuby::Shp4r
44
46
 
45
47
  class App
46
- VERSION = '0.1.0'
48
+ VERSION = '0.2.0'
47
49
 
48
50
  attr_reader :options
49
51
 
@@ -121,7 +123,7 @@ class App
121
123
  # TODO - implement your real logic here
122
124
  true if @arguments.length >= 1
123
125
 
124
- # TODO: Does the directory specified exist and have at least 1 shapefile?
126
+ # TODO: Does the output file already exist? Should we overwrite by default?
125
127
  end
126
128
 
127
129
  # Setup the arguments
@@ -146,44 +148,63 @@ class App
146
148
  # Loop over each shapefile specified on the command line
147
149
  # and process the shapes contained within
148
150
 
151
+ output_file = File.new(@options.output, "w+")
152
+
149
153
  puts "Finding geo shapes in the following files:"
150
154
 
151
155
  ARGV.each do |a|
152
- Dir.glob(a) do |shp|
153
- puts " #{a} ..."
156
+ Dir.glob(a) do |shp|
154
157
  shp_basename = File.basename(shp)
155
- if shp_basename =~ /(.*)\.shp/
156
- puts "\nImporting " + File.basename(shp)
157
- ShpFile.open(shp) do |shp|
158
- shp.each do |shape|
159
- # add the statefp00 + countyfp00 + tractce00 (if they exist) and see if there
160
- # is already a Place object representing this one.
161
-
158
+ if shp_basename =~ /(.*)\.shp$/
159
+ #puts " #{a} ..."
160
+
161
+ ShpFile.open(shp) do |shp|
162
+ pbar = ProgressBar.new(File.basename(a), shp.record_count, STDOUT)
163
+ pbar.format="%-14s %3d%% %s %s"
164
+
165
+ shp.each do |shape|
166
+ pbar.inc
162
167
  shape.data["STATEFP00"] ||= ""
163
168
  shape.data["COUNTYFP00"] ||= ""
164
169
  shape.data["TRACTCE00"] ||= ""
165
-
166
- fp_id = shape.data["STATEFP00"] + shape.data["COUNTYFP00"] + shape.data["TRACTCE00"]
167
-
168
- print "."
169
-
170
- # Loop over each field in this shapefile, and add
171
- shp.fields.each do |field|
172
- puts "#{field.name}"
173
- #new_place[field.name.downcase] = shape.data[field.name]
174
- end
175
-
176
- #new_place.the_geom = shape.geometry
170
+
171
+ # The FPID is useful for quickly narrowing down a search of the shapes
172
+ shape.data["FPID"] = shape.data["STATEFP00"] + shape.data["COUNTYFP00"] + shape.data["TRACTCE00"]
173
+
174
+ points_text = shape.geometry.text_representation
175
+ # Since the text_representation looks like this:
176
+ # ((-72.574763 42.579249,-72.574716 42.579381,-72.574707 42.579407))
177
+ # we need to do some data massaging to get it into the encoding class as an array
178
+
179
+ @geometry_points = Array.new
180
+ # Remove all parantheses, split on the comma, then split on the space. reconstruct the array
181
+ @point_pairs = points_text.gsub("(", "").gsub(")", "").split(",")
182
+ @point_pairs.each do |p|
183
+ # split the string into an array. push that array onto the data array
184
+ @one_pair = p.split(" ")
185
+ # Convert to floats
186
+ @one_pair = @one_pair.map do |point|
187
+ point.to_f
188
+ end
189
+
190
+ @geometry_points.push(@one_pair.reverse)
191
+ end
192
+
193
+ encoder = GMapPolylineEncoder.new()
194
+ shape.data["POLYLINES"] = encoder.encode(@geometry_points)
195
+
196
+ output_file.syswrite(shape.data.to_json)
177
197
  end
178
198
  end
179
199
  end
180
200
  end
181
201
  end
202
+ puts "Done processing all shapefiles that were found."
182
203
  end
183
204
  end
184
205
 
185
206
 
186
- # TO DO - Add your Modules, Classes, etc
207
+ # TODO - Add your Modules, Classes, etc
187
208
 
188
209
 
189
210
  # Create and run the application
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: scensus-utils
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tony Wieczorek
@@ -42,6 +42,16 @@ dependencies:
42
42
  - !ruby/object:Gem::Version
43
43
  version: 1.2.0
44
44
  version:
45
+ - !ruby/object:Gem::Dependency
46
+ name: progressbar
47
+ type: :runtime
48
+ version_requirement:
49
+ version_requirements: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ">="
52
+ - !ruby/object:Gem::Version
53
+ version: 0.9.0
54
+ version:
45
55
  description: The US Census can be hard to digest for mere mortals. Geographic data is hidden away in shapefiles, a format unsupported by the freely available mapping sites like Google Maps and OpenStreetMap. Map servers, like GeoServer and MapServer have support for shapefiles, but those solutions are often too much for smaller organizations to set up and maintain. Scensus is a project to bring simple mapping of US Census data to the rest of us. Scensus-utils is a set of ruby scripts and files necessary to transform the census data in use for the Scensus project. You do not need to install Scensus-utils to run Scensus, but they are provided to foster further collaboration on the techniques and tools used to map.
46
56
  email: tonyjw@gmail.com
47
57
  executables: []
@@ -58,6 +68,7 @@ files:
58
68
  - README.rdoc
59
69
  - Rakefile
60
70
  - VERSION
71
+ - lib/gmap_polyline_encoder.rb
61
72
  - lib/scensus-utils.rb
62
73
  - test/helper.rb
63
74
  - test/test_scensus-utils.rb