geomodel 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,467 @@
1
+ module Geomodel
2
+
3
+ # Defines the notion of 'geocells' and exposes methods to operate on them.
4
+ #
5
+ # A geocell is a hexadecimal string that defines a two dimensional rectangular
6
+ # region inside the [-90,90] x [-180,180] latitude/longitude space. A geocell's
7
+ # 'resolution' is its length. For most practical purposes, at high resolutions,
8
+ # geocells can be treated as single points.
9
+ #
10
+ # Much like geohashes [see http://en.wikipedia.org/wiki/Geohash], geocells are
11
+ # hierarchical, in that any prefix of a geocell is considered its ancestor, with
12
+ # geocell[:-1] being geocell's immediate parent cell.
13
+ #
14
+ # To calculate the rectangle of a given geocell string, first divide the
15
+ # [-90,90] x [-180,180] latitude/longitude space evenly into a 4x4 grid like so:
16
+ #
17
+ # +---+---+---+---+ [90, 180]
18
+ # | a | b | e | f |
19
+ # +---+---+---+---+
20
+ # | 8 | 9 | c | d |
21
+ # +---+---+---+---+
22
+ # | 2 | 3 | 6 | 7 |
23
+ # +---+---+---+---+
24
+ # | 0 | 1 | 4 | 5 |
25
+ # [-90,-180] +---+---+---+---+
26
+ #
27
+ # NOTE: The point [0, 0] is at the intersection of grid cells 3, 6, 9 and c. And,
28
+ # for example, cell 7 should be the sub-rectangle from
29
+ # [-45, 90] to [0, 180].
30
+ #
31
+ # Calculate the sub-rectangle for the first character of the geocell string and
32
+ # re-divide this sub-rectangle into another 4x4 grid. For example, if the geocell
33
+ # string is '78a', we will re-divide the sub-rectangle like so:
34
+ #
35
+ # . .
36
+ # . .
37
+ # . . +----+----+----+----+ [0, 180]
38
+ # | 7a | 7b | 7e | 7f |
39
+ # +----+----+----+----+
40
+ # | 78 | 79 | 7c | 7d |
41
+ # +----+----+----+----+
42
+ # | 72 | 73 | 76 | 77 |
43
+ # +----+----+----+----+
44
+ # | 70 | 71 | 74 | 75 |
45
+ # . . [-45,90] +----+----+----+----+
46
+ # . .
47
+ # . .
48
+ #
49
+ # Continue to re-divide into sub-rectangles and 4x4 grids until the entire
50
+ # geocell string has been exhausted. The final sub-rectangle is the rectangular
51
+ # region for the geocell.
52
+ #
53
+ module GeoCell
54
+ # Geocell algorithm constants.
55
+ GEOCELL_GRID_SIZE = 4
56
+ GEOCELL_ALPHABET = '0123456789abcdef'
57
+
58
+ # The maximum *practical* geocell resolution.
59
+ MAX_GEOCELL_RESOLUTION = 13
60
+
61
+ # The maximum number of geocells to consider for a bounding box search.
62
+ MAX_FEASIBLE_BBOX_SEARCH_CELLS = 300
63
+
64
+ # Direction enumerations.
65
+ NORTHWEST = [-1, 1]
66
+ NORTH = [0, 1]
67
+ NORTHEAST = [1, 1]
68
+ EAST = [1, 0]
69
+ SOUTHEAST = [1, -1]
70
+ SOUTH = [0, -1]
71
+ SOUTHWEST = [-1, -1]
72
+ WEST = [-1, 0]
73
+
74
+ def self.generate_geocells(point)
75
+ geocell_max = self.compute(point, MAX_GEOCELL_RESOLUTION)
76
+
77
+ (1..MAX_GEOCELL_RESOLUTION).map do |resolution|
78
+ self.compute(point, resolution)
79
+ end
80
+ end
81
+
82
+ # Returns an efficient set of geocells to search in a bounding box query.
83
+ #
84
+ # This method is guaranteed to return a set of geocells having the same
85
+ # resolution.
86
+ #
87
+ # Args:
88
+ # bbox: A geotypes.Box indicating the bounding box being searched.
89
+ # cost_function: A function that accepts two arguments:
90
+ # * num_cells: the number of cells to search
91
+ # * resolution: the resolution of each cell to search
92
+ # and returns the 'cost' of querying against this number of cells
93
+ # at the given resolution.
94
+ #
95
+ # Returns:
96
+ # A list of geocell strings that contain the given box.
97
+ #
98
+ def self.best_bbox_search_cells(bbox, cost_function)
99
+
100
+ cell_ne = compute(bbox.north_east, MAX_GEOCELL_RESOLUTION)
101
+ cell_sw = compute(bbox.south_west, MAX_GEOCELL_RESOLUTION)
102
+
103
+ # The current lowest BBOX-search cost found; start with practical infinity.
104
+ min_cost = 1e10000
105
+
106
+ # The set of cells having the lowest calculated BBOX-search cost.
107
+ min_cost_cell_set = nil
108
+
109
+ # First find the common prefix, if there is one.. this will be the base
110
+ # resolution.. i.e. we don't have to look at any higher resolution cells.
111
+ min_resolution = common_prefix([cell_sw, cell_ne]).size
112
+
113
+ # Iteravely calculate all possible sets of cells that wholely contain
114
+ # the requested bounding box.
115
+ (min_resolution..(MAX_GEOCELL_RESOLUTION + 1)).each do |cur_resolution|
116
+ cur_ne = cell_ne[0...cur_resolution]
117
+ cur_sw = cell_sw[0...cur_resolution]
118
+
119
+ num_cells = interpolation_count(cur_ne, cur_sw)
120
+ next if num_cells > MAX_FEASIBLE_BBOX_SEARCH_CELLS
121
+
122
+ cell_set = interpolate(cur_ne, cur_sw).sort
123
+ simplified_cells = []
124
+
125
+ cost = cost_function.call(cell_set.size, cur_resolution)
126
+
127
+ # TODO(romannurik): See if this resolution is even possible, as in the
128
+ # future cells at certain resolutions may not be stored.
129
+ if cost <= min_cost
130
+ min_cost = cost
131
+ min_cost_cell_set = cell_set
132
+ else
133
+ # Once the cost starts rising, we won't be able to do better, so abort.
134
+ break
135
+ end
136
+ end
137
+
138
+ min_cost_cell_set
139
+ end
140
+
141
+ # Determines whether the given cells are collinear along a dimension.
142
+ #
143
+ # Returns True if the given cells are in the same row (column_test=False)
144
+ # or in the same column (column_test=True).
145
+ #
146
+ # Args:
147
+ # cell1: The first geocell string.
148
+ # cell2: The second geocell string.
149
+ # column_test: A boolean, where False invokes a row collinearity test
150
+ # and 1 invokes a column collinearity test.
151
+ #
152
+ # Returns:
153
+ # A bool indicating whether or not the given cells are collinear in the given
154
+ # dimension.
155
+ #
156
+ def self.collinear(cell1, cell2, column_test)
157
+ upto = [cell1.size, cell2.size].min - 1
158
+
159
+ (0..upto).each do |i|
160
+ x1, y1 = subdiv_xy(cell1[i])
161
+ x2, y2 = subdiv_xy(cell2[i])
162
+
163
+ # Check row collinearity (assure y's are always the same).
164
+ return false if (!column_test && y1 != y2)
165
+
166
+ # Check column collinearity (assure x's are always the same).
167
+ return false if (column_test && x1 != x2)
168
+ end
169
+
170
+ true
171
+ end
172
+
173
+ # Calculates the grid of cells formed between the two given cells.
174
+ #
175
+ # Generates the set of cells in the grid created by interpolating from the
176
+ # given Northeast geocell to the given Southwest geocell.
177
+ #
178
+ # Assumes the Northeast geocell is actually Northeast of Southwest geocell.
179
+ #
180
+ # Arguments:
181
+ # cell_ne: The Northeast geocell string.
182
+ # cell_sw: The Southwest geocell string.
183
+ #
184
+ # Returns:
185
+ # A list of geocell strings in the interpolation.
186
+ #
187
+ def self.interpolate(cell_ne, cell_sw)
188
+ # 2D array, will later be flattened.
189
+ cell_set = [[cell_sw]]
190
+
191
+ # First get adjacent geocells across until Southeast--collinearity with
192
+ # Northeast in vertical direction (0) means we're at Southeast.
193
+ while !collinear(cell_set.first.last, cell_ne, true)
194
+ cell_tmp = adjacent(cell_set.first.last, [1, 0])
195
+ cell_set.first << cell_tmp unless cell_tmp.nil?
196
+ end
197
+
198
+ # Then get adjacent geocells upwards.
199
+ while cell_set.last.last != cell_ne
200
+ cell_tmp_row = cell_set.last.map { |g| adjacent(g, [0, 1]) }
201
+ cell_set << cell_tmp_row unless cell_tmp_row.first.nil?
202
+ end
203
+
204
+ # Flatten cell_set, since it's currently a 2D array.
205
+ cell_set.flatten
206
+ end
207
+
208
+ # Computes the number of cells in the grid formed between two given cells.
209
+ #
210
+ # Computes the number of cells in the grid created by interpolating from the
211
+ # given Northeast geocell to the given Southwest geocell. Assumes the Northeast
212
+ # geocell is actually Northeast of Southwest geocell.
213
+ #
214
+ # Arguments:
215
+ # cell_ne: The Northeast geocell string.
216
+ # cell_sw: The Southwest geocell string.
217
+ #
218
+ # Returns:
219
+ # An int, indicating the number of geocells in the interpolation.
220
+ #
221
+ def self.interpolation_count(cell_ne, cell_sw)
222
+
223
+ bbox_ne = compute_box(cell_ne)
224
+ bbox_sw = compute_box(cell_sw)
225
+
226
+ cell_lat_span = bbox_sw.north - bbox_sw.south
227
+ cell_lon_span = bbox_sw.east - bbox_sw.west
228
+
229
+ num_cols = ((bbox_ne.east - bbox_sw.west) / cell_lon_span).to_i
230
+ num_rows = ((bbox_ne.north - bbox_sw.south) / cell_lat_span).to_i
231
+
232
+ num_cols * num_rows
233
+ end
234
+
235
+ # Calculates all of the given geocell's adjacent geocells.
236
+ #
237
+ # Args:
238
+ # cell: The geocell string for which to calculate adjacent/neighboring cells.
239
+ #
240
+ # Returns:
241
+ # A list of 8 geocell strings and/or None values indicating adjacent cells.
242
+ #
243
+ def self.all_adjacents(cell)
244
+ [NORTHWEST, NORTH, NORTHEAST, EAST, SOUTHEAST, SOUTH, SOUTHWEST, WEST].map { |d| adjacent(cell, d)}
245
+ end
246
+
247
+ # Calculates the geocell adjacent to the given cell in the given direction.
248
+ #
249
+ # Args:
250
+ # cell: The geocell string whose neighbor is being calculated.
251
+ # dir: An (x, y) tuple indicating direction, where x and y can be -1, 0, or 1.
252
+ # -1 corresponds to West for x and South for y, and
253
+ # 1 corresponds to East for x and North for y.
254
+ # Available helper constants are NORTH, EAST, SOUTH, WEST,
255
+ # NORTHEAST, NORTHWEST, SOUTHEAST, and SOUTHWEST.
256
+ #
257
+ # Returns:
258
+ # The geocell adjacent to the given cell in the given direction, or None if
259
+ # there is no such cell.
260
+ #
261
+ def self.adjacent(cell, dir)
262
+ return nil if cell.nil?
263
+
264
+ dx = dir[0]
265
+ dy = dir[1]
266
+
267
+ cell_adj_arr = cell.split(//) # Split the geocell string characters into a list.
268
+ i = cell_adj_arr.size - 1
269
+
270
+ while i >= 0 && (dx != 0 or dy != 0)
271
+ x, y = subdiv_xy(cell_adj_arr[i])
272
+
273
+ # Horizontal adjacency.
274
+ if dx == -1 # Asking for left.
275
+ if x == 0 # At left of parent cell.
276
+ x = GEOCELL_GRID_SIZE - 1 # Becomes right edge of adjacent parent.
277
+ else
278
+ x -= 1 # Adjacent, same parent.
279
+ dx = 0 # Done with x.
280
+ end
281
+ elsif dx == 1 # Asking for right.
282
+ if x == GEOCELL_GRID_SIZE - 1 # At right of parent cell.
283
+ x = 0 # Becomes left edge of adjacent parent.
284
+ else
285
+ x += 1 # Adjacent, same parent.
286
+ dx = 0 # Done with x.
287
+ end
288
+ end
289
+
290
+ # Vertical adjacency.
291
+ if dy == 1 # Asking for above.
292
+ if y == GEOCELL_GRID_SIZE - 1 # At top of parent cell.
293
+ y = 0 # Becomes bottom edge of adjacent parent.
294
+ else
295
+ y += 1 # Adjacent, same parent.
296
+ dy = 0 # Done with y.
297
+ end
298
+ elsif dy == -1 # Asking for below.
299
+ if y == 0 # At bottom of parent cell.
300
+ y = GEOCELL_GRID_SIZE - 1 # Becomes top edge of adjacent parent.
301
+ else
302
+ y -= 1 # Adjacent, same parent.
303
+ dy = 0 # Done with y.
304
+ end
305
+ end
306
+
307
+ cell_adj_arr[i] = subdiv_char([x,y])
308
+ i -= 1
309
+ end
310
+
311
+ # If we're not done with y then it's trying to wrap vertically,
312
+ # which is a failure.
313
+ return nil if dy != 0
314
+
315
+ # At this point, horizontal wrapping is done inherently.
316
+ cell_adj_arr.join('')
317
+ end
318
+
319
+ # Returns whether or not the given cell contains the given point.
320
+ def self.contains_point(cell, point)
321
+ compute(point, cell.size) == cell
322
+ end
323
+
324
+ # Returns the shortest distance between a point and a geocell bounding box.
325
+ #
326
+ # If the point is inside the cell, the shortest distance is always to a 'edge'
327
+ # of the cell rectangle. If the point is outside the cell, the shortest distance
328
+ # will be to either a 'edge' or 'corner' of the cell rectangle.
329
+ #
330
+ # Returns:
331
+ # The shortest distance from the point to the geocell's rectangle, in meters.
332
+ #
333
+ def self.point_distance(cell, point)
334
+ bbox = compute_box(cell)
335
+
336
+ between_w_e = bbox.west <= point.lon && point.lon <= bbox.east
337
+ between_n_s = bbox.south <= point.lat && point.lat <= bbox.north
338
+
339
+ if between_w_e
340
+ if between_n_s
341
+ # Inside the geocell.
342
+ return [Geocoder::GeoMath.distance(point, Geomodel::Types::Point.new(bbox.south, point.lon)),
343
+ Geocoder::GeoMath.distance(point, Geomodel::Types::Point.new(bbox.north, point.lon)),
344
+ Geocoder::GeoMath.distance(point, Geomodel::Types::Point.new(point.lat, bbox.east)),
345
+ Geocoder::GeoMath.distance(point, Geomodel::Types::Point.new(point.lat, bbox.west))].min
346
+ else
347
+ return [Geocoder::GeoMath.distance(point, Geomodel::Types::Point.new(bbox.south, point.lon)),
348
+ Geocoder::GeoMath.distance(point, Geomodel::Types::Point.new(bbox.north, point.lon))].min
349
+ end
350
+ else
351
+ if between_n_s
352
+ return [Geocoder::GeoMath.distance(point, Geomodel::Types::Point.new(point.lat, bbox.east)),
353
+ Geocoder::GeoMath.distance(point, Geomodel::Types::Point.new(point.lat, bbox.west))]
354
+ else
355
+ # TODO(romannurik): optimize
356
+ return [Geocoder::GeoMath.distance(point, Geomodel::Types::Point.new(bbox.south, bbox.east)),
357
+ Geocoder::GeoMath.distance(point, Geomodel::Types::Point.new(bbox.north, bbox.east)),
358
+ Geocoder::GeoMath.distance(point, Geomodel::Types::Point.new(bbox.south, bbox.west)),
359
+ Geocoder::GeoMath.distance(point, Geomodel::Types::Point.new(bbox.north, bbox.west))]
360
+ end
361
+ end
362
+ end
363
+
364
+ # Computes the geocell containing the given point to the given resolution.
365
+ #
366
+ # This is a simple 16-tree lookup to an arbitrary depth (resolution).
367
+ #
368
+ # Args:
369
+ # point: The geotypes.Point to compute the cell for.
370
+ # resolution: An int indicating the resolution of the cell to compute.
371
+ #
372
+ # Returns:
373
+ # The geocell string containing the given point, of length <resolution>.
374
+ #
375
+ def self.compute(point, resolution = MAX_GEOCELL_RESOLUTION)
376
+ north = 90.0
377
+ south = -90.0
378
+ east = 180.0
379
+ west = -180.0
380
+
381
+ cell = ''
382
+ while cell.size < resolution
383
+ subcell_lon_span = (east - west) / GEOCELL_GRID_SIZE
384
+ subcell_lat_span = (north - south) / GEOCELL_GRID_SIZE
385
+
386
+ x = [(GEOCELL_GRID_SIZE * (point.longitude - west) / (east - west)).to_i,
387
+ GEOCELL_GRID_SIZE - 1].min
388
+ y = [(GEOCELL_GRID_SIZE * (point.latitude - south) / (north - south)).to_i,
389
+ GEOCELL_GRID_SIZE - 1].min
390
+
391
+ cell += subdiv_char([x,y])
392
+
393
+ south += subcell_lat_span * y
394
+ north = south + subcell_lat_span
395
+
396
+ west += subcell_lon_span * x
397
+ east = west + subcell_lon_span
398
+ end
399
+
400
+ cell
401
+ end
402
+
403
+ # Computes the rectangular boundaries (bounding box) of the given geocell.
404
+ #
405
+ # Args:
406
+ # cell: The geocell string whose boundaries are to be computed.
407
+ #
408
+ # Returns:
409
+ # A geotypes.Box corresponding to the rectangular boundaries of the geocell.
410
+ #
411
+ def self.compute_box(cell)
412
+ return nil if cell.nil?
413
+
414
+ bbox = Geomodel::Types::Box.new(90.0, 180.0, -90.0, -180.0)
415
+
416
+ cell_copy = cell.clone
417
+
418
+ while cell_copy.size > 0
419
+ subcell_lon_span = (bbox.east - bbox.west) / GEOCELL_GRID_SIZE
420
+ subcell_lat_span = (bbox.north - bbox.south) / GEOCELL_GRID_SIZE
421
+
422
+ x, y = subdiv_xy(cell_copy[0])
423
+
424
+ bbox = Geomodel::Types::Box.new(bbox.south + subcell_lat_span * (y + 1),
425
+ bbox.west + subcell_lon_span * (x + 1),
426
+ bbox.south + subcell_lat_span * y,
427
+ bbox.west + subcell_lon_span * x)
428
+
429
+ cell_copy.slice!(0)
430
+ end
431
+
432
+ bbox
433
+ end
434
+
435
+ # Returns whether or not the given geocell string defines a valid geocell.
436
+ def self.is_valid(cell)
437
+ !cell.nil? &&
438
+ cell.size > 0 &&
439
+ cell.split(//).inject(true) { |val, c| val && GEOCELL_ALPHABET.include?(c) }
440
+ end
441
+
442
+ # Calculates the immediate children of the given geocell.
443
+ #
444
+ # For example, the immediate children of 'a' are 'a0', 'a1', ..., 'af'.
445
+ #
446
+ def self.children(cell)
447
+ GEOCELL_ALPHABET.map { |chr| cell + chr }
448
+ end
449
+
450
+ # Returns the (x, y) of the geocell character in the 4x4 alphabet grid.
451
+ # NOTE: This only works for grid size 4.
452
+ def self.subdiv_xy(char)
453
+ char = GEOCELL_ALPHABET.index(char)
454
+ [(char & 4) >> 1 | (char & 1) >> 0, (char & 8) >> 2 | (char & 2) >> 1]
455
+ end
456
+
457
+ # Returns the geocell character in the 4x4 alphabet grid at pos. (x, y).
458
+ # NOTE: This only works for grid size 4.
459
+ def self.subdiv_char(pos)
460
+ GEOCELL_ALPHABET[(pos[1] & 2) << 2 | (pos[0] & 2) << 1 | (pos[1] & 1) << 1 | (pos[0] & 1) << 0]
461
+ end
462
+
463
+ def self.common_prefix(list)
464
+ /\A(.*).*(\n\1.*)*\Z/.match(list.join("\n"))[1]
465
+ end
466
+ end
467
+ end